use new optparser

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1163 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2004-01-03 19:44:34 +00:00
parent 558d8ad2e7
commit 9f191a920a

View file

@ -42,112 +42,7 @@ _profile = "linkchecker.prof"
# main usage text
Usage = i18n._("""USAGE\tlinkchecker [options] file-or-url...
OPTIONS
For single-letter option arguments the space is not a necessity. So
'-o colored' is the same as '-ocolored'.
-a, --anchors
Check HTTP anchor references. This option applies to both internal
and external urls. Default is don't check anchors.
This option implies -w because anchor errors are always warnings.
-C, --cookies
Accept and send HTTP cookies according to RFC 2109. Only cookies
which are sent back to the originating server are accepted.
Sent and accepted cookies are provided as additional logging
information.
-d, --denyallow
Swap checking order to external/internal. Default checking order
is internal/external.
-D, --debug
Print debugging information. Provide this option multiple times
for even more debugging information.
-e regex, --extern=regex
Assume urls that match the given expression as external.
Only internal HTML links are checked recursively.
-f file, --config=file
Use file as configuration file. As default LinkChecker first
searches /etc/linkcheckerrc and then ~/.linkcheckerrc
(under Windows <path-to-program>\\linkcheckerrc).
-F type[/filename], --file-output=type[/filename]
Same as -o, but write to a file linkchecker-out.<type>
or <filename> if specified. If the file already exists, it
is overwritten. You can specify this option more than once.
There is no file output for the blacklist logger. Default is
no file output.
-I, --interactive
Ask for url if none are given on the commandline.
-i regex, --intern=regex
Assume URLs that match the given expression as internal.
LinkChecker descends recursively only to internal URLs, not to
external.
-h, --help
Help me! Print usage information for this program.
-N server, --nntp-server=server
Specify an NNTP server for 'news:...' links. Default is the
environment variable NNTP_SERVER. If no host is given,
only the syntax of the link is checked.
--no-anchor-caching
Treat url#anchora and url#anchorb as equal on caching. This
is the default browser behaviour, but it's not specified in
the URI specification. Use with care.
-o type, --output=type
Specify output type as %s.
Default type is text.
-p pwd, --password=pwd
Try password pwd for HTTP and FTP authorization.
Default password is 'joe@'. See also -u.
-P secs, --pause=secs
Pause <secs> seconds between each url check. This option
implies -t0.
Default is no pause between requests.
--profile
Write profiling data into a file named %s in the
current working directory.
See also --viewprof.
-q, --quiet
Quiet operation. This is only useful with -F.
-r depth, --recursion-level=depth
Check recursively all links up to given depth. A negative depth
will enable inifinite recursion.
Default depth is 1.
-s, --strict
Check only syntax of external links, do not try to connect to them.
For local file urls, only local files are internal. For
http and ftp urls, all urls at the same domain name are internal.
--status
Print check status every 5 seconds to stderr.
-t num, --threads=num
Generate no more than num threads. Default number of threads is 5.
To disable threading specify a non-positive number.
--timeout=secs
Set the timeout for TCP connection attempts in seconds. The default
timeout is 30 seconds.
-u name, --user=name
Try username name for HTTP and FTP authorization.
Default is 'anonymous'. See also -p.
-V, --version
Print version and exit.
-v, --verbose
Log all checked URLs (implies -w). Default is to log only invalid
URLs.
--viewprof
Print out previously generated profiling data. See also --profile.
-w, --warnings
Log warnings.
-W regex, --warning-regex=regex
Define a regular expression which prints a warning if it matches
any content of the checked link.
This applies of course only to pages which are valid, so we can
get their content.
Use this to check for pages that contain some form of error
message, for example 'This page has moved' or 'Oracle
Application Server error'.
This option implies -w.
--warning-size-bytes=bytes
Print a warning if content size is available and exceeds the given
number of bytes.
This option implies -w.
""") % (LoggerKeys, _profile)
""")
Notes = i18n._("""NOTES
o A ! before any regex negates it. So '!^mailto:' matches everything but
@ -220,193 +115,263 @@ def viewprof ():
sys.exit(0)
# Read command line arguments
try:
# Note: cut out the name of the script
options, args = getopt.getopt(sys.argv[1:],
"adCDe:f:F:hIi:N:o:p:P:qr:Rst:u:VvwW:", # short options
["anchors", # long options
"config=",
"cookies",
"debug",
"extern=",
"file-output=",
"nntp-server=",
"help",
"interactive",
"intern=",
"denyallow",
"output=",
"password=",
"pause=",
"profile",
"quiet",
"recursion-level=",
"no-anchor-caching",
"wischiwaschi",
"robots-txt",
"strict",
"status",
"threads=",
"timeout=",
"user=",
"version",
"verbose",
"viewprof",
"warnings",
"warning-regex="])
except getopt.error:
type, value = sys.exc_info()[:2]
printUsage(value)
from optparse import OptionParser
optparser = OptionParser()
optparser.add_option("-a", "--anchors", action="store_true", dest="anchors",
help=i18n._(
"""Check HTTP anchor references. This option applies to both internal
and external urls. Default is don't check anchors.
This option implies -w because anchor errors are always warnings."""))
optparser.add_option("-C", "--cookies", action="store_true", dest="cookies",
help=i18n._(
"""Accept and send HTTP cookies according to RFC 2109. Only cookies
which are sent back to the originating server are accepted.
Sent and accepted cookies are provided as additional logging
information."""))
optparser.add_option("-d", "--denyallow", dest="denyallow",
help=i18n._(
"""Swap checking order to external/internal. Default checking order
is internal/external."""))
optparser.add_option("-e", "--extern", type="string", action="append", dest="extern",
help=i18n._(
"""Assume urls that match the given expression as external.
Only internal HTML links are checked recursively."""))
optparser.add_option("-f", "--config", type="string", dest="configfile",
help=i18n._(
"""Use file as configuration file. As default LinkChecker first
searches /etc/linkcheckerrc and then ~/.linkcheckerrc
(under Windows <path-to-program>\\linkcheckerrc)."""))
optparser.add_option("-F", "--file-output", type="string", dest="fileoutput",
help=i18n._(
"""type[/filename]
Same as -o, but write to a file linkchecker-out.<type>
or <filename> if specified. If the file already exists, it
is overwritten. You can specify this option more than once.
There is no file output for the blacklist logger. Default is
no file output."""))
optparser.add_option("-D", "--debug", action="count",
help=i18n._(
"""Print debugging information. Provide this option multiple times
for even more debugging information."""))
optparser.add_option("-I", "--interactive", action="store_true", dest="interactive",
help=i18n._(
"""Ask for url if none are given on the commandline."""))
optparser.add_option("-i", "--intern", type="string", action="append", dest="intern",
help=i18n._(
""" regex, --intern=regex
Assume URLs that match the given expression as internal.
LinkChecker descends recursively only to internal URLs, not to
external."""))
optparser.add_option("-N", "--nntp-server", type="string", dest="nntpserver",
help=i18n._(
"""Specify an NNTP server for 'news:...' links. Default is the
environment variable NNTP_SERVER. If no host is given,
only the syntax of the link is checked."""))
optparser.add_option("--no-anchor-caching", action="store_false", dest="anchorcaching",
help=i18n._(
"""Treat url#anchora and url#anchorb as equal on caching. This
is the default browser behaviour, but it's not specified in
the URI specification. Use with care."""))
optparser.add_option("-o", "--output", type="string", dest="output",
help=i18n._(
"""Specify output type as %s. Default type is text.""")%LoggerKeys)
optparser.add_option("-p", "--password", type="string", dest="password",
help=i18n._(
"""Try password pwd for HTTP and FTP authorization.
Default password is 'joe@'. See also -u."""))
optparser.add_option("-P", "--pause", type="int", dest="pause",
help=i18n._(
"""Pause <secs> seconds between each url check. This option implies -t0.
Default is no pause between requests."""))
optparser.add_option("--profile", action="store_true", dest="profile",
help=i18n._(
"""Write profiling data into a file named %s in the
current working directory. See also --viewprof.""")%_profile)
optparser.add_option("-q", "--quiet", action="store_true", dest="quiet",
help=i18n._(
"""Quiet operation. This is only useful with -F."""))
optparser.add_option("-r", "--recursion-level", type="int", dest="recursionlevel",
help=i18n._(
"""Check recursively all links up to given depth. A negative depth
will enable inifinite recursion. Default depth is 1."""))
optparser.add_option("-s", "--strict", action="store_true", dest="strict",
help=i18n._(
"""Check only syntax of external links, do not try to connect to them.
For local file urls, only local files are internal. For
http and ftp urls, all urls at the same domain name are internal."""))
optparser.add_option("--status", action="store_true", dest="status",
help=i18n._(
"""Print check status every 5 seconds to stderr."""))
optparser.add_option("-t", "--threads", type="int", dest="threads",
help=i18n._(
"""Generate no more than num threads. Default number of threads is 5.
To disable threading specify a non-positive number."""))
optparser.add_option("--timeout", type="int", dest="timeout",
help=i18n._(
"""Set the timeout for TCP connection attempts in seconds. The default
timeout is 30 seconds."""))
optparser.add_option("-u", "--user", type="string", dest="username",
help=i18n._(
"""Try username name for HTTP and FTP authorization.
Default is 'anonymous'. See also -p."""))
optparser.add_option("-V", "--version", dest="version",
help=i18n._(
"""Print version and exit."""))
optparser.add_option("-v", "--verbose", action="store_true", dest="verbose",
help=i18n._(
"""Log all checked URLs (implies -w). Default is to log only invalid
URLs."""))
optparser.add_option("--viewprof", action="store_true", dest="viewprof",
help=i18n._(
"""Print out previously generated profiling data. See also --profile."""))
optparser.add_option("-w", "--warnings", action="store_true", dest="warnings",
help=i18n._("""Log warnings."""))
optparser.add_option("-W", "--warning-regex", type="string", dest="warningregex",
help=i18n._(
"""Define a regular expression which prints a warning if it matches
any content of the checked link.
This applies of course only to pages which are valid, so we can
get their content.
Use this to check for pages that contain some form of error
message, for example 'This page has moved' or 'Oracle
Application Server error'.
This option implies -w."""))
optparser.add_option("--warning-size-bytes", dest="warningsizebytes",
help=i18n._(
"""Print a warning if content size is available and exceeds the given
number of bytes. This option implies -w."""))
if "--wischiwaschi" in sys.argv:
from linkcheck import util1
util1.abbuzze()
sys.exit(0)
(options, args) = optparser.parse_args()
# set debug level as early as possible
for opt,arg in options:
if opt=="-D" or opt=="--debug":
set_debuglevel(get_debuglevel()+1)
if options.debug is not None:
set_debuglevel(options.debug)
debug(BRING_IT_ON, "Python", sys.version, "on", sys.platform)
# read configuration from config files
# config object
config = linkcheck.Config.Configuration()
# read configuration from config files
configfiles = []
for opt,arg in options:
if opt=="-f" or opt=="--config":
configfiles.append(arg)
if options.configfile:
configfiles.append(options.configfile)
config.read(configfiles)
# apply commandline options and arguments
_user = "anonymous"
_password = "guest@"
constructauth = False
do_profile = False
for opt,arg in options:
if opt=="-a" or opt=="--anchors":
config["anchors"] = True
config["warnings"] = True
elif opt=="-e" or opt=="--extern":
config["externlinks"].append(linkcheck.getLinkPat(arg))
elif opt=="-h" or opt=="--help":
printHelp()
elif opt=="-o" or opt=="--output":
if linkcheck.log.Loggers.has_key(arg):
config['log'] = config.newLogger(arg)
else:
printUsage((i18n._("Illegal argument '%s' for option ") % arg) +\
"'-o, --output'")
elif opt=="-F" or opt=="--file-output":
ns = {'fileoutput': 1}
try:
type, ns['filename'] = arg.split('/', 1)
if not ns['filename']: raise ValueError
except ValueError: type = arg
if linkcheck.log.Loggers.has_key(type) and type != "blacklist":
config['fileoutput'].append(config.newLogger(type, ns))
else:
printUsage((i18n._("Illegal argument '%s' for option ") % arg) +\
"'-F, --file-output'")
elif opt=="-I" or opt=="--interactive":
config['interactive'] = True
elif opt=="-i" or opt=="--intern":
config["internlinks"].append(linkcheck.getLinkPat(arg))
elif opt=="-l" or opt=="--denyallow":
config["denyallow"] = True
elif opt=="-N" or opt=="--nntp-server":
config["nntpserver"] = arg
elif opt=="--no-anchor-caching":
config["anchorcaching"] = False
elif opt=="-p" or opt=="--password":
_password = arg
constructauth = True
elif opt=="-P" or opt=="--pause":
try:
wait = int(arg)
except ValueError:
printUsage(i18n._("Illegal argument %r for option %s") % \
(arg, "'-P, --pause'"))
if wait >= 0:
config["wait"] = wait
else:
printUsage(i18n._("Illegal argument %r for option %s") % \
(arg, "'-P, --pause'"))
elif opt=="--profile":
do_profile = True
elif opt=="-q" or opt=="--quiet":
config["quiet"] = True
elif opt=="-r" or opt=="--recursion-level":
try:
depth = int(arg)
if depth >= 0:
config["recursionlevel"] = depth
else:
config["recursionlevel"] = -1
except ValueError:
printUsage(i18n._("Illegal argument %r for option %s") % \
(arg, "'-r, --recursion-level'"))
# robots.txt is now default, so ignore this option
elif opt=="-R" or opt=="--robots-txt": pass
elif opt=="-s" or opt=="--strict":
config["strict"] = True
elif opt=="--status":
config['status'] = True
elif opt=="-t" or opt=="--threads":
try:
config.setThreads(int(arg))
except ValueError:
printUsage(i18n._("Illegal argument %r for option %s") % \
(arg, "'-t, --threads'"))
elif opt=="--timeout":
try:
timeout = int(arg)
if timeout <= 0:
printUsage(i18n._("Illegal argument %r for option %s") % \
(arg, "'--timeout'"))
socket.setdefaulttimeout(timeout)
except ValueError:
printUsage(i18n._("Illegal argument %r for option %s") % \
(arg, "'--timeout'"))
elif opt=="-u" or opt=="--user":
_user = arg
constructauth = True
elif opt=="-V" or opt=="--version":
printVersion()
elif opt=="-v" or opt=="--verbose":
if options.anchors is not None:
config["anchors"] = options.anchors
config["warnings"] = True
if options.extern:
config["externlinks"].extend([linkcheck.getLinkPat(arg) for arg in options.extern])
if options.output:
if linkcheck.log.Loggers.has_key(options.output):
config['log'] = config.newLogger(options.output)
else:
printUsage(i18n._("Illegal argument %r for option %s") % \
(options.output, "'-o, --output'"))
if options.fileoutput:
ns = {'fileoutput': 1}
try:
ftype, ns['filename'] = options.fileoutput.split('/', 1)
if not ns['filename']: raise ValueError
except ValueError:
ftype = options.fileoutput
if linkcheck.log.Loggers.has_key(ftype) and ftype != "blacklist":
config['fileoutput'].append(config.newLogger(ftype, ns))
else:
printUsage(i18n._("Illegal argument %r for option %s") % \
(options.fileoutput, "'-F, --file-output'"))
if options.interactive is not None:
config['interactive'] = options.interactive
if options.intern:
config["internlinks"].extend([linkcheck.getLinkPat(arg) for arg in options.intern])
if options.denyallow is not None:
config["denyallow"] = options.denyallow
if options.nntpserver:
config["nntpserver"] = options.nntpserver
if options.anchorcaching is not None:
config["anchorcaching"] = options.anchorcaching
if options.password is not None:
_password = options.password
constructauth = True
if options.pause is not None:
if options.pause >= 0:
config["wait"] = options.pause
else:
printUsage(i18n._("Illegal argument %d for option %s") % \
(options.pause, "'-P, --pause'"))
if options.profile is not None:
do_profile = options.profile
if options.quiet is not None:
config["quiet"] = options.quiet
if options.recursionlevel is not None:
config["recursionlevel"] = options.recursionlevel
if options.strict is not None:
config["strict"] = options.strict
if options.status is not None:
config['status'] = options.status
if options.threads is not None:
config.setThreads(options.threads)
if options.timeout is not None:
if options.timeout > 0:
socket.setdefaulttimeout(options.timeout)
else:
printUsage(i18n._("Illegal argument %r for option %s") % \
(options.timeout, "'--timeout'"))
if options.username is not None:
_user = options.username
constructauth = True
if options.version is not None:
printVersion()
if options.verbose is not None:
if options.verbose:
config["verbose"] = True
config["warnings"] = True
elif opt=="--viewprof":
viewprof()
elif opt=="--wischiwaschi":
from linkcheck import util1
util1.abbuzze()
sys.exit(0)
elif opt=="-w" or opt=="--warnings":
config["warnings"] = True
elif opt=="-W" or opt=="--warning-regex":
config["warningregex"] = re.compile(arg)
config["warnings"] = True
elif opt=="-C" or opt=="--cookies":
config['cookies'] = True
if options.viewprof:
viewprof()
if options.warnings is not None:
config["warnings"] = options.warnings
if options.warningregex is not None:
config["warningregex"] = re.compile(options.warningregex)
config["warnings"] = True
if options.warningsizebytes is not None:
config["warnsizebytes"] = options.warningsizebytes
if options.cookies is not None:
config['cookies'] = options.cookies
if constructauth:
config["authentication"].insert(0, {'pattern': re.compile(".*"),
'user': _user,