mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-22 15:14:44 +00:00
regression test suite
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@229 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
83a0846fef
commit
5d1c6ef00e
24 changed files with 609 additions and 306 deletions
|
|
@ -6,6 +6,7 @@ dist
|
|||
foo
|
||||
MANIFEST
|
||||
VERSION
|
||||
LinkCheckerConf.py
|
||||
linkcheckerConf.py
|
||||
js
|
||||
locale
|
||||
Packages.gz
|
||||
|
|
|
|||
6
Makefile
6
Makefile
|
|
@ -68,11 +68,7 @@ uploadpull: distclean dist package files VERSION
|
|||
ssh -C -t shell1.sourceforge.net "cd /home/groups/$(PACKAGE) && make pull"
|
||||
|
||||
test:
|
||||
rm -f test/*.result
|
||||
@for i in test/*.html; do \
|
||||
echo "Testing $$i. Results are in $$i.result"; \
|
||||
./$(PACKAGE) -r1 -ucalvin -pcalvin -otext -N"news.rz.uni-sb.de" -v -a $$i > $$i.result 2>&1; \
|
||||
done
|
||||
python2 test/regrtest.py
|
||||
|
||||
locale:
|
||||
$(MAKE) -C po
|
||||
|
|
|
|||
5
debian/changelog
vendored
5
debian/changelog
vendored
|
|
@ -1,9 +1,10 @@
|
|||
linkchecker (1.3.0) unstable; urgency=low
|
||||
|
||||
* require Python 2.0
|
||||
* require and use Python >= 2.0
|
||||
* fix agent matching in robotparser2.py
|
||||
* added more LinkPatterns (ripped from HTML::Tagset.pm)
|
||||
|
||||
-- Bastian Kleineidam <calvin@users.sourceforge.net> Thu, 1 Feb 2001 01:51:27 +0100
|
||||
-- Bastian Kleineidam <calvin@users.sourceforge.net> Fri, 9 Feb 2001 10:51:24 +0100
|
||||
|
||||
linkchecker (1.2.14) unstable; urgency=low
|
||||
|
||||
|
|
|
|||
2
debian/control
vendored
2
debian/control
vendored
|
|
@ -2,7 +2,7 @@ Source: linkchecker
|
|||
Section: web
|
||||
Priority: optional
|
||||
Maintainer: Bastian Kleineidam <calvin@users.sourceforge.net>
|
||||
Build-Depends: python2-base (>= 2.0), python2-base (<= 2.0), python2-dev (>= 1.5.2), python2-dev (<= 2.0), debhelper
|
||||
Build-Depends: python2-base (>= 2.0), python2-base (<= 2.0), python2-dev (>= 1.5.2), python2-dev (<= 2.0), debhelper (>= 3.0.0)
|
||||
Build-Depends-Indep: gettext
|
||||
Standards-Version: 3.2.1
|
||||
|
||||
|
|
|
|||
5
debian/rules
vendored
5
debian/rules
vendored
|
|
@ -60,7 +60,6 @@ binary-indep: build install
|
|||
|
||||
# Build architecture-dependent files here.
|
||||
binary-arch: build install
|
||||
# dh_testversion
|
||||
dh_testdir
|
||||
dh_testroot
|
||||
# dh_installdebconf
|
||||
|
|
@ -71,7 +70,7 @@ binary-arch: build install
|
|||
# dh_installpam
|
||||
# dh_installinit
|
||||
# dh_installcron
|
||||
dh_installmanpages
|
||||
dh_installman linkchecker.1
|
||||
# dh_installinfo
|
||||
# dh_undocumented linkchecker.1
|
||||
dh_installchangelogs
|
||||
|
|
@ -79,8 +78,6 @@ binary-arch: build install
|
|||
dh_strip
|
||||
dh_compress
|
||||
dh_fixperms
|
||||
# You may want to make some executables suid here.
|
||||
# dh_suidregister
|
||||
# dh_makeshlibs
|
||||
dh_installdeb
|
||||
# dh_perl
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ Loggers = {
|
|||
"csv": Logging.CSVLogger,
|
||||
"blacklist": Logging.BlacklistLogger,
|
||||
"xml": Logging.XMLLogger,
|
||||
"test": Logging.TestLogger,
|
||||
}
|
||||
# for easy printing: a comma separated logger list
|
||||
LoggerKeys = reduce(lambda x, y: x+", "+y, Loggers.keys())
|
||||
|
|
@ -102,9 +103,10 @@ class Configuration(UserDict.UserDict):
|
|||
'joe@')]
|
||||
self["proxy"] = getproxies()
|
||||
self["recursionlevel"] = 1
|
||||
self["robotstxt"] = 0
|
||||
self["robotstxt"] = 1
|
||||
self["strict"] = 0
|
||||
self["fileoutput"] = []
|
||||
self["loggingfields"] = "all"
|
||||
# Logger configurations
|
||||
self["text"] = {
|
||||
"filename": "linkchecker-out.txt",
|
||||
|
|
@ -152,6 +154,7 @@ class Configuration(UserDict.UserDict):
|
|||
self['xml'] = {
|
||||
"filename": "linkchecker-out.xml",
|
||||
}
|
||||
self['test'] = {} # no args for test logger
|
||||
# default values
|
||||
self['log'] = self.newLogger('text')
|
||||
self["quiet"] = 0
|
||||
|
|
@ -412,8 +415,9 @@ class Configuration(UserDict.UserDict):
|
|||
try: self["warnings"] = cfgparser.getboolean(section, "warnings")
|
||||
except ConfigParser.Error: pass
|
||||
try:
|
||||
filelist = string.split(cfgparser.get(section, "fileoutput"))
|
||||
filelist = string.split(cfgparser.get(section, "fileoutput"), ",")
|
||||
for arg in filelist:
|
||||
arg = string.strip(arg)
|
||||
# no file output for the blacklist Logger
|
||||
if Loggers.has_key(arg) and arg != "blacklist":
|
||||
self['fileoutput'].append(
|
||||
|
|
@ -424,6 +428,10 @@ class Configuration(UserDict.UserDict):
|
|||
for opt in cfgparser.options(key):
|
||||
try: self[key][opt] = cfgparser.get(key, opt)
|
||||
except ConfigParser.Error: pass
|
||||
try:
|
||||
self['loggingfields'] = map(string.strip, string.split(
|
||||
cfgparser.get(section, 'loggingfields'), ","))
|
||||
except ConfigParser.Error: pass
|
||||
|
||||
section="checking"
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -44,22 +44,22 @@ import Config, StringUtil
|
|||
import linkcheck
|
||||
_ = linkcheck._
|
||||
|
||||
# keywords
|
||||
KeyWords = ["Real URL",
|
||||
"Result",
|
||||
"Base",
|
||||
"Name",
|
||||
"Parent URL",
|
||||
"Info",
|
||||
"Warning",
|
||||
"D/L Time",
|
||||
"Check Time",
|
||||
"URL",
|
||||
]
|
||||
MaxIndent = max(map(lambda x: len(_(x)), KeyWords))+1
|
||||
LogFields = {
|
||||
"realurl": "Real URL",
|
||||
"result": "Result",
|
||||
"base": "Base",
|
||||
"name": "Name",
|
||||
"parenturl": "Parent URL",
|
||||
"info": "Info",
|
||||
"warning": "Warning",
|
||||
"downloadtime": "D/L Time",
|
||||
"checktime": "Check Time",
|
||||
"url": "URL",
|
||||
}
|
||||
MaxIndent = max(map(lambda x: len(_(x)), LogFields.values()))+1
|
||||
Spaces = {}
|
||||
for key in KeyWords:
|
||||
Spaces[key] = " "*(MaxIndent - len(_(key)))
|
||||
for key,value in LogFields.items():
|
||||
Spaces[key] = " "*(MaxIndent - len(_(value)))
|
||||
|
||||
EntityTable = {
|
||||
'<': '<',
|
||||
|
|
@ -99,7 +99,13 @@ class StandardLogger:
|
|||
self.fd = args['fd']
|
||||
else:
|
||||
self.fd = sys.stdout
|
||||
self.logfields = None # all fields
|
||||
if args.has_key('logfields'):
|
||||
if type(args['logfields']) == ListType:
|
||||
self.logfields = args
|
||||
|
||||
def logfield(self, name):
|
||||
return self.logfields and name in self.logfields
|
||||
|
||||
def init(self):
|
||||
self.starttime = time.time()
|
||||
|
|
@ -111,14 +117,15 @@ class StandardLogger:
|
|||
|
||||
|
||||
def newUrl(self, urlData):
|
||||
self.fd.write("\n"+_("URL")+Spaces["URL"]+urlData.urlName)
|
||||
if urlData.cached:
|
||||
self.fd.write(_(" (cached)\n"))
|
||||
else:
|
||||
self.fd.write("\n")
|
||||
if urlData.name:
|
||||
self.fd.write(_("Name")+Spaces["Name"]+urlData.name+"\n")
|
||||
if urlData.parentName:
|
||||
if self.logfield('url'):
|
||||
self.fd.write("\n"+_(LogFields['url'])+Spaces['url']+urlData.urlName)
|
||||
if urlData.cached:
|
||||
self.fd.write(_(" (cached)\n"))
|
||||
else:
|
||||
self.fd.write("\n")
|
||||
if urlData.name and self.logfield('name'):
|
||||
self.fd.write(_(LogFields["name"])+Spaces["name"]+urlData.name+"\n")
|
||||
if urlData.parentName and self.logfield('parentname'):
|
||||
self.fd.write(_("Parent URL")+Spaces["Parent URL"]+
|
||||
urlData.parentName+_(", line ")+
|
||||
str(urlData.line)+"\n")
|
||||
|
|
@ -739,3 +746,33 @@ class CSVLogger(StandardLogger):
|
|||
self.fd.flush()
|
||||
self.fd = None
|
||||
|
||||
class TestLogger:
|
||||
""" Output for regression test """
|
||||
def __init__(self, **args):
|
||||
pass
|
||||
|
||||
def init(self):
|
||||
pass
|
||||
|
||||
def newUrl(self, urlData):
|
||||
print 'url',urlData.urlName
|
||||
if urlData.cached:
|
||||
print "cached"
|
||||
if urlData.name:
|
||||
print "name",urlData.name
|
||||
if urlData.parentName:
|
||||
print "parenturl",urlData.parentName
|
||||
print "line",urlData.line
|
||||
if urlData.baseRef:
|
||||
print "baseurl",urlData.baseRef
|
||||
if urlData.infoString:
|
||||
print "info",urlData.infoString
|
||||
if urlData.warningString:
|
||||
print "warning",urlData.warningString
|
||||
if urlData.valid:
|
||||
print "valid",urlData.validString
|
||||
else:
|
||||
print "error",urlData.errorString
|
||||
|
||||
def endOfOutput(self, linknumber=-1):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -137,7 +137,7 @@ class RobotFileParser:
|
|||
return 1
|
||||
# search for given user agent matches
|
||||
# the first match counts
|
||||
url = urllib.quote(urlparse.urlparse(url)[2])
|
||||
url = urllib.quote(urlparse.urlparse(url)[2]) or "/"
|
||||
for entry in self.entries:
|
||||
if entry.applies_to(useragent):
|
||||
return entry.allowance(url)
|
||||
|
|
@ -222,6 +222,8 @@ def _test():
|
|||
rp.parse(open(sys.argv[1]).readlines())
|
||||
# test for re.escape
|
||||
_check(rp.can_fetch('*', 'http://www.musi-cal.com/'), 1)
|
||||
# empty url path
|
||||
_check(rp.can_fetch('*', 'http://www.musi-cal.com'), 1)
|
||||
# this should match the first rule, which is a disallow
|
||||
_check(rp.can_fetch('', 'http://www.musi-cal.com/'), 0)
|
||||
# various cherry pickers
|
||||
|
|
|
|||
202
linkchecker
202
linkchecker
|
|
@ -27,102 +27,106 @@ import linkcheck
|
|||
from linkcheck import _,StringUtil
|
||||
|
||||
|
||||
Usage = _("USAGE\tlinkchecker [options] file-or-url...\n"
|
||||
"\n"
|
||||
"OPTIONS\n"
|
||||
"For single-letter option arguments the space is not a necessity. So\n"
|
||||
"'-o colored' is the same as '-ocolored'.\n"
|
||||
"-a, --anchors\n"
|
||||
" Check anchor references. Default is don't check anchors.\n"
|
||||
"-d, --denyallow\n"
|
||||
" Swap checking order to extern/intern. Default checking order\n"
|
||||
" is intern/extern.\n"
|
||||
"-D, --debug\n"
|
||||
" Print additional debugging information.\n"
|
||||
"-e regex, --extern=regex\n"
|
||||
" Assume urls that match the given expression as extern.\n"
|
||||
" Only intern HTML links are checked recursively.\n"
|
||||
"-f file, --config=file\n"
|
||||
" Use file as configuration file. LinkChecker first searches\n"
|
||||
" ~/.linkcheckerrc and then /etc/linkcheckerrc\n"
|
||||
" (under Windows <path-to-program>\\linkcheckerrc).\n"
|
||||
"-F type, --file-output=type\n"
|
||||
" Same as output, but write to a file linkchecker-out.<type>.\n"
|
||||
" If the file already exists, it is overwritten. You can specify\n"
|
||||
" this option more than once. There is no file output for the\n"
|
||||
" blacklist logger. Default is no file output.\n"
|
||||
"-i regex, --intern=regex\n"
|
||||
" Assume URLs that match the given expression as intern.\n"
|
||||
" LinkChecker descends recursively only to intern URLs, not to extern.\n"
|
||||
"-h, --help\n"
|
||||
" Help me! Print usage information for this program.\n"
|
||||
"-N server, --nntp-server=server\n"
|
||||
" Specify an NNTP server for 'news:...' links. Default is the\n"
|
||||
" environment variable NNTP_SERVER. If no host is given,\n"
|
||||
" only the syntax of the link is checked.\n"
|
||||
"-o type, --output=type\n"
|
||||
" Specify output type as %s.\n"
|
||||
" Default type is text.\n"
|
||||
"-p pwd, --password=pwd\n"
|
||||
" Try password pwd for HTML and FTP authorization.\n"
|
||||
" Default password is 'joe@'. See also -u.\n"
|
||||
"-q, --quiet\n"
|
||||
" Quiet operation. This is only useful with -F.\n"
|
||||
"-r depth, --recursion-level=depth\n"
|
||||
" Check recursively all links up to given depth (depth >= 0).\n"
|
||||
" Default depth is 1.\n"
|
||||
"-R, --robots-txt\n"
|
||||
" Obey the robots exclusion standard.\n"
|
||||
"-s, --strict\n"
|
||||
" Check only syntax of extern links, do not try to connect to them.\n"
|
||||
"-t num, --threads=num\n"
|
||||
" Generate no more than num threads. Default number of threads is 5.\n"
|
||||
" To disable threading specify a non-positive number.\n"
|
||||
"-u name, --user=name\n"
|
||||
" Try username name for HTML and FTP authorization.\n"
|
||||
" Default is 'anonymous'. See also -p.\n"
|
||||
"-V, --version\n"
|
||||
" Print version and exit.\n"
|
||||
"-v, --verbose\n"
|
||||
" Log all checked URLs (implies -w). Default is to log only invalid\n"
|
||||
" URLs.\n"
|
||||
"-w, --warnings\n"
|
||||
" Log warnings.\n"
|
||||
"-W regex, --warning-regex=regex\n"
|
||||
" Define a regular expression which prints a warning if it matches\n"
|
||||
" any content of the checked link.\n"
|
||||
" This applies of course only to pages which are valid, so we can\n"
|
||||
" get their content.\n"
|
||||
" Use this to check for pages that contain some form of error\n"
|
||||
" message, for example 'This page has moved' or 'Oracle\n"
|
||||
" Application Server error'.\n"
|
||||
" This option implies -w.\n") % linkcheck.Config.LoggerKeys
|
||||
Usage = _("""USAGE\tlinkchecker [options] file-or-url...
|
||||
|
||||
Notes = _("NOTES\n"
|
||||
"o LinkChecker assumes an http:// resp. ftp:// link when a commandline URL\n"
|
||||
" starts with 'www.' resp. 'ftp.'\n"
|
||||
" You can also give local files as arguments.\n"
|
||||
"o If you have your system configured to automatically establish a\n"
|
||||
" connection to the internet (e.g. with diald), it will connect when\n"
|
||||
" checking links not pointing to your local host.\n"
|
||||
" Use the -s and -i options to prevent this.\n"
|
||||
"o Javascript links are currently ignored.\n"
|
||||
"o If your platform does not support threading, LinkChecker uses -t0.\n"
|
||||
"o You can supply multiple user/password pairs in a configuration file.\n"
|
||||
"o Cookies are not accepted by LinkChecker.\n"
|
||||
"o To use proxies set $http_proxy, $https_proxy on Unix or Windows.\n"
|
||||
" On a Mac use the Internet Config.\n"
|
||||
"o When checking 'news:' links the given NNTP host doesn't need to be the\n"
|
||||
" same as the host of the user browsing your pages!\n")
|
||||
OPTIONS
|
||||
For single-letter option arguments the space is not a necessity. So
|
||||
'-o colored' is the same as '-ocolored'.
|
||||
-a, --anchors
|
||||
Check anchor references. Default is don't check anchors.
|
||||
-d, --denyallow
|
||||
Swap checking order to extern/intern. Default checking order
|
||||
is intern/extern.
|
||||
-D, --debug
|
||||
Print additional debugging information.
|
||||
-e regex, --extern=regex
|
||||
Assume urls that match the given expression as extern.
|
||||
Only intern HTML links are checked recursively.
|
||||
-f file, --config=file
|
||||
Use file as configuration file. LinkChecker first searches
|
||||
~/.linkcheckerrc and then /etc/linkcheckerrc
|
||||
(under Windows <path-to-program>\\linkcheckerrc).
|
||||
-F type[/filename], --file-output=type[/filename]
|
||||
Same as output, but write to a file linkchecker-out.<type>
|
||||
or <filename> if specified. If the file already exists, it
|
||||
is overwritten. You can specify this option more than once.
|
||||
There is no file output for the blacklist logger. Default is
|
||||
no file output.
|
||||
-i regex, --intern=regex
|
||||
Assume URLs that match the given expression as intern.
|
||||
LinkChecker descends recursively only to intern URLs, not to extern.
|
||||
-h, --help
|
||||
Help me! Print usage information for this program.
|
||||
-N server, --nntp-server=server
|
||||
Specify an NNTP server for 'news:...' links. Default is the
|
||||
environment variable NNTP_SERVER. If no host is given,
|
||||
only the syntax of the link is checked.
|
||||
-o type, --output=type
|
||||
Specify output type as %s.
|
||||
Default type is text.
|
||||
-p pwd, --password=pwd
|
||||
Try password pwd for HTML and FTP authorization.
|
||||
Default password is 'joe@'. See also -u.
|
||||
-q, --quiet
|
||||
Quiet operation. This is only useful with -F.
|
||||
-r depth, --recursion-level=depth
|
||||
Check recursively all links up to given depth (depth >= 0).
|
||||
Default depth is 1.
|
||||
-R, --robots-txt
|
||||
Obey the robots exclusion standard.
|
||||
-s, --strict
|
||||
Check only syntax of extern links, do not try to connect to them.
|
||||
-t num, --threads=num
|
||||
Generate no more than num threads. Default number of threads is 5.
|
||||
To disable threading specify a non-positive number.
|
||||
-u name, --user=name
|
||||
Try username name for HTML and FTP authorization.
|
||||
Default is 'anonymous'. See also -p.
|
||||
-V, --version
|
||||
Print version and exit.
|
||||
-v, --verbose
|
||||
Log all checked URLs (implies -w). Default is to log only invalid
|
||||
URLs.
|
||||
-w, --warnings
|
||||
Log warnings.
|
||||
-W regex, --warning-regex=regex
|
||||
Define a regular expression which prints a warning if it matches
|
||||
any content of the checked link.
|
||||
This applies of course only to pages which are valid, so we can
|
||||
get their content.
|
||||
Use this to check for pages that contain some form of error
|
||||
message, for example 'This page has moved' or 'Oracle
|
||||
Application Server error'.
|
||||
This option implies -w.\n") % linkcheck.Config.LoggerKeys
|
||||
"""
|
||||
|
||||
Examples = _("EXAMPLES\n"
|
||||
"o linkchecker -v -ohtml -r2 -s -itreasure.calvinsplayground.de \\\n"
|
||||
" http://treasure.calvinsplayground.de/~calvin/ > sample.html\n"
|
||||
"o Local files and syntactic sugar on the command line:\n"
|
||||
" linkchecker c:\\temp\\test.html\n"
|
||||
" linkchecker ../bla.html\n"
|
||||
" linkchecker www.myhomepage.de\n"
|
||||
" linkchecker -r0 ftp.linux.org\n")
|
||||
Notes = _("""NOTES
|
||||
o LinkChecker assumes an http:// resp. ftp:// link when a commandline URL
|
||||
starts with 'www.' resp. 'ftp.'
|
||||
You can also give local files as arguments.
|
||||
o If you have your system configured to automatically establish a
|
||||
connection to the internet (e.g. with diald), it will connect when
|
||||
checking links not pointing to your local host.
|
||||
Use the -s and -i options to prevent this.
|
||||
o Javascript links are currently ignored.
|
||||
o If your platform does not support threading, LinkChecker uses -t0.
|
||||
o You can supply multiple user/password pairs in a configuration file.
|
||||
o Cookies are not accepted by LinkChecker.
|
||||
o To use proxies set $http_proxy, $https_proxy on Unix or Windows.
|
||||
On a Mac use the Internet Config.
|
||||
o When checking 'news:' links the given NNTP host doesn't need to be the
|
||||
same as the host of the user browsing your pages!
|
||||
""")
|
||||
|
||||
Examples = _("""EXAMPLES
|
||||
o linkchecker -v -ohtml -r2 -s -itreasure.calvinsplayground.de \\
|
||||
http://treasure.calvinsplayground.de/~calvin/ > sample.html
|
||||
o Local files and syntactic sugar on the command line:
|
||||
linkchecker c:\\temp\\test.html
|
||||
linkchecker ../bla.html
|
||||
linkchecker www.myhomepage.de
|
||||
linkchecker -r0 ftp.linux.org
|
||||
""")
|
||||
|
||||
def printVersion():
|
||||
print linkcheck.Config.AppInfo
|
||||
|
|
@ -207,9 +211,13 @@ for opt,arg in options:
|
|||
"'-o, --output'")
|
||||
|
||||
elif opt=="-F" or opt=="--file-output":
|
||||
if linkcheck.Config.Loggers.has_key(arg) and arg != "blacklist":
|
||||
config['fileoutput'].append(
|
||||
config.newLogger(arg, {'fileoutput':1}))
|
||||
ns = {'fileoutput':1}
|
||||
try:
|
||||
type, ns['filename'] = string.split(arg, '/', 1)
|
||||
if not ns['filename']: raise ValueError
|
||||
except ValueError: type = arg
|
||||
if linkcheck.Config.Loggers.has_key(type) and type != "blacklist":
|
||||
config['fileoutput'].append(config.newLogger(type, ns))
|
||||
else:
|
||||
printUsage((_("Illegal argument '%s' for option ") % arg) +\
|
||||
"'-F, --file-output'")
|
||||
|
|
|
|||
|
|
@ -29,84 +29,85 @@ a (Fast)CGI web interface (requires HTTP server)
|
|||
For single-letter option arguments the space is not a necessity.
|
||||
So \fI-o colored\fP is the same as \fI-ocolored\fP.
|
||||
.TP
|
||||
\fB-a, --anchors\fP
|
||||
\fB-a\fP, \fB--anchors\fP
|
||||
Check anchor references. Default is don't check anchors.
|
||||
.TP
|
||||
\fB-d, --denyallow\fP
|
||||
\fB-d\fP, \fB--denyallow\fP
|
||||
Swap checking order to extern/intern. Default checking order is
|
||||
intern/extern.
|
||||
.TP
|
||||
\fB-D, --debug\fP
|
||||
\fB-D\fP, \fB--debug\fP
|
||||
Print debugging information.
|
||||
.TP
|
||||
\fB-e \fIregex\fB, --extern=\fIregex\fP
|
||||
\fB-e \fIregex\fP, \fB--extern=\fIregex\fP
|
||||
Assume urls that match the given regular expression as extern.
|
||||
Only intern HTML links are checked recursively.
|
||||
.TP
|
||||
\fB-f \fIfile\fB, --config=\fIfile\fP
|
||||
\fB-f \fIfile\fP, \fB--config=\fIfile\fP
|
||||
Use \fIfile\fP as configuration file. LinkChecker first searches for
|
||||
~/.linkcheckerrc and then /etc/linkcheckerrc on Unix systems.
|
||||
Under Windows systems we read <path-to-program>\\linkcheckerrc.
|
||||
.TP
|
||||
\fB-F \fItype\fB, --file-output=\fItype\fP
|
||||
Same as output, but write to a file \fIlinkchecker-out.<type>\fP.
|
||||
If the file already exists, it is overwritten. You can specify this
|
||||
option more than once. There is no file output for the blacklist
|
||||
logger. Default is no file output.
|
||||
\fB-F \fItype\fP[\fI/filename\fP], \fB--file-output=\fItype\fP[\fI/filename\fP]
|
||||
Same as output, but write to a file \fIlinkchecker-out.<type>\fP
|
||||
or \fIfilename\fP if specified. If the file already exists, it is
|
||||
overwritten. You can specify this option more than once. There
|
||||
is no file output for the blacklist logger. Default is no file
|
||||
output.
|
||||
.TP
|
||||
\fB-i \fIregex\fB, --intern=\fIregex\fP
|
||||
\fB-i \fIregex\fP, \fB--intern=\fIregex\fP
|
||||
Assume URLs that match the given regular expression as intern.
|
||||
LinkChecker descends recursively only to intern URLs, not to extern.
|
||||
.TP
|
||||
\fB-h, --help\fP
|
||||
\fB-h\fP, \fB--help\fP
|
||||
Help me! Print usage information for this program.
|
||||
.TP
|
||||
\fB-N \fIserver\fB, --nntp-server=\fIserver\fP
|
||||
\fB-N \fIserver\fP, \fB--nntp-server=\fIserver\fP
|
||||
Specify an NNTP server for 'news:...' links. Default is the
|
||||
environment variable NNTP_SERVER. If no host is given,
|
||||
only the syntax of the link is checked.
|
||||
.TP
|
||||
\fB-o \fItype\fB, --output=\fItype\fP
|
||||
\fB-o \fItype\fP, \fB--output=\fItype\fP
|
||||
Specify output type as \fItext\fP, \fIcolored\fP, \fIhtml\fP, \fIsql\fP,
|
||||
\fIcsv\fP, \fIgml\fP, \fIxml\fP or \fIblacklist\fP.
|
||||
Default type is \fItext\fP.
|
||||
.TP
|
||||
\fB-p \fIpwd\fB, --password=\fIpwd\fP
|
||||
\fB-p \fIpwd\fP, \fB--password=\fIpwd\fP
|
||||
Try the password \fIpwd\fB for HTML and FTP authorization.
|
||||
The default password is \fIguest@\fP. See also \fB-u\fP.
|
||||
.TP
|
||||
\fB-q, --quiet\fP
|
||||
\fB-q\fP, \fB--quiet\fP
|
||||
Quiet operation. This is only useful with \fB-F\fP.
|
||||
.TP
|
||||
\fB-r \fIdepth\fB, --recursion-level=\fIdepth\fP
|
||||
\fB-r \fIdepth\fP, \fB--recursion-level=\fIdepth\fP
|
||||
Check recursively all links up to given \fIdepth\fP (depth >= 0).
|
||||
Default depth is 1.
|
||||
.TP
|
||||
\fB-R, --robots-txt\fP
|
||||
\fB-R\fP, \fB--robots-txt\fP
|
||||
Obey the robots exclusion standard.
|
||||
.TP
|
||||
\fB-s, --strict\fP
|
||||
\fB-s\fP, \fB--strict\fP
|
||||
Check only the syntax of extern links, do not try to connect to them.
|
||||
.TP
|
||||
\fB-t \fInum\fB, --threads=\fInum\fP
|
||||
\fB-t \fInum\fP, \fB--threads=\fInum\fP
|
||||
Generate no more than \fInum\fP threads. Default number of threads is 5.
|
||||
To disable threading specify a non-positive number.
|
||||
.TP
|
||||
\fB-u \fIname\fB, --user=\fIname\fP
|
||||
\fB-u \fIname\fP, \fB--user=\fIname\fP
|
||||
Try username \fIname\fP for HTML and FTP authorization.
|
||||
Default is \fIanonymous\fP. See also \fB-p\fP.
|
||||
.TP
|
||||
\fB-V, --version\fP
|
||||
\fB-V\fP, \fB--version\fP
|
||||
Print version and exit.
|
||||
.TP
|
||||
\fB-v, --verbose\fP
|
||||
\fB-v\fP, \fB--verbose\fP
|
||||
Log all checked URLs (implies \fB-w\fP). Default is to log only invalid
|
||||
URLs.
|
||||
.TP
|
||||
\fB-w, --warnings\fP
|
||||
\fB-w\fP, \fB--warnings\fP
|
||||
Log warnings.
|
||||
.TP
|
||||
\fB-W \fIregex\fB, --warning-regex=\fIregex\fP
|
||||
\fB-W \fIregex\fP, \fB--warning-regex=\fIregex\fP
|
||||
Define a regular expression which prints a warning if it matches any
|
||||
content of the checked link.
|
||||
This applies of course only to pages which are valid, so we can get
|
||||
|
|
|
|||
|
|
@ -1,91 +1,98 @@
|
|||
# sample resource file
|
||||
# to modify, just uncomment the line
|
||||
# sample resource file with default values
|
||||
# see linkchecker -h for help on these options
|
||||
# commandline options override these settings!
|
||||
|
||||
[output]
|
||||
# turn on/off debug messages
|
||||
#debug=0
|
||||
debug=0
|
||||
# use the color logger
|
||||
#log=colored
|
||||
log=text
|
||||
# turn on/off --verbose
|
||||
#verbose=0
|
||||
verbose=0
|
||||
# turn on/off --warnings
|
||||
#warnings=0
|
||||
warnings=0
|
||||
# turn on/off --quiet
|
||||
#quiet=0
|
||||
quiet=0
|
||||
# additional file output
|
||||
#fileoutput = text colored html gml sql
|
||||
fileoutput=
|
||||
#fileoutput = text, colored, html, gml, sql XXX
|
||||
# what fields should each logger print out?
|
||||
fields = all
|
||||
# field = url, parent url, base url
|
||||
# fields names: XXX
|
||||
# url
|
||||
# parent url
|
||||
# base url
|
||||
|
||||
# each Logger can have separate configuration parameters
|
||||
# standard text logger
|
||||
[text]
|
||||
#filename=linkchecker-out.txt
|
||||
filename=linkchecker-out.txt
|
||||
|
||||
# GML logger
|
||||
[gml]
|
||||
#filename=linkchecker-out.gml
|
||||
filename=linkchecker-out.gml
|
||||
|
||||
# CSV logger
|
||||
[csv]
|
||||
#filename=linkchecker-out.csv
|
||||
#separator=;
|
||||
filename=linkchecker-out.csv
|
||||
separator=;
|
||||
|
||||
# SQL logger
|
||||
[sql]
|
||||
#filename=linkchecker-out.sql
|
||||
#dbname=linksdb
|
||||
#commandsep=;
|
||||
filename=linkchecker-out.sql
|
||||
dbname=linksdb
|
||||
commandsep=;
|
||||
|
||||
# HTML logger
|
||||
[html]
|
||||
#filename=linkchecker-out.html
|
||||
filename=linkchecker-out.html
|
||||
# colors for the various parts
|
||||
#colorbackground="#ffffff"
|
||||
#colorurl=blue
|
||||
#colorborder=
|
||||
#colorlink=
|
||||
#tablewarning=
|
||||
#tableok=
|
||||
#tableerror=
|
||||
colorbackground="#fff7e5"
|
||||
colorurl="#dcd5cf"
|
||||
colorborder="#000000"
|
||||
colorlink="#191c83"
|
||||
tablewarning=<td bgcolor="#e0954e">
|
||||
tableerror=<td bgcolor="#db4930">
|
||||
tableok=<td bgcolor="#3ba557">
|
||||
|
||||
# ANSI color logger
|
||||
[colored]
|
||||
#filename=linkchecker-out.ansi
|
||||
# colors for the various parts
|
||||
#colorparent=
|
||||
#colorurl=
|
||||
#colorname=
|
||||
#colorreal=
|
||||
#colorbase=
|
||||
#colorvalid=
|
||||
#colorinvalid=
|
||||
#colorinfo=
|
||||
#colorwarning=
|
||||
#colordltime=
|
||||
#colorreset=
|
||||
filename=linkchecker-out.ansi
|
||||
# colors for the various parts (\x1b = ESC)
|
||||
colorparent="\x1b[37m"
|
||||
colorurl="\x1b[0m"
|
||||
colorname="\x1b[0m"
|
||||
colorreal="\x1b[36m"
|
||||
colorbase="\x1b[35m"
|
||||
colorvalid="\x1b[1;32m"
|
||||
colorinvalid="\x1b[1;31m"
|
||||
colorinfo="\x1b[0m"
|
||||
colorwarning="\x1b[1;33m"
|
||||
colordltime="\x1b[0m"
|
||||
colorreset="\x1b[0m"
|
||||
|
||||
# blacklist logger
|
||||
[blacklist]
|
||||
#filename=~/.blacklist
|
||||
filename=~/.blacklist
|
||||
|
||||
# checking options
|
||||
[checking]
|
||||
# number of threads
|
||||
#threads=5
|
||||
threads=5
|
||||
# check anchors?
|
||||
#anchors=0
|
||||
#recursionlevel=1
|
||||
anchors=0
|
||||
recursionlevel=1
|
||||
# obey robots.txt exclusion?
|
||||
#robotstxt=0
|
||||
robotstxt=1
|
||||
# overall strict checking. You can specify for each extern URL
|
||||
# separately if its strict or not. See the [filtering] section
|
||||
#strict=0
|
||||
strict=0
|
||||
# supply a regular expression for which warnings are printed if found
|
||||
# in any HTML files.
|
||||
#warningregex="Request failed"
|
||||
warningregex=
|
||||
# Basic NNTP server. Overrides NNTP_SERVER environment variable.
|
||||
#nntpserver=news.uni-stuttgart.de
|
||||
nntpserver=
|
||||
|
||||
# filtering options (see FAQ)
|
||||
# for each extern link we can specify if it is strict or not
|
||||
|
|
@ -98,7 +105,7 @@
|
|||
# internlinks=calvinsplayground\.de
|
||||
# check only syntax of all mail adresses
|
||||
# extern3=^mailto: 1
|
||||
#denyallow=0
|
||||
denyallow=0
|
||||
|
||||
# You can provide different user/password pairs for different link types.
|
||||
# Entries are a triple (link regular expression, username, password),
|
||||
|
|
|
|||
|
|
@ -1,3 +1,2 @@
|
|||
*.result
|
||||
*.prof
|
||||
*.pyc
|
||||
|
|
|
|||
1
test/__init__.py
Normal file
1
test/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
# Dummy file to make this directory a package.
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
<!-- base without href -->
|
||||
<base target="_top">
|
||||
<!-- meta url -->
|
||||
<META HTTP-equiv="refresh" content="0; url=http://www.calvinandhobbes.com/">
|
||||
<!-- spaces between key and value -->
|
||||
<a href
|
||||
=
|
||||
"file:/etc">
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
<!-- base with href -->
|
||||
<base href="file:/etc/">
|
||||
<!-- good file -->
|
||||
<a href="passwd">
|
||||
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
<!-- frame src urls -->
|
||||
<frameset border="0" frameborder="0" framespacing="0">
|
||||
<frame name="top" src="test1.html" frameborder="0">
|
||||
<frame name="bottom" src="test2.html" frameborder="0">
|
||||
</frameset>
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!-- extra mail checking -->
|
||||
<html><head></head>
|
||||
<body>
|
||||
<!-- legal -->
|
||||
<a href=mailto:calvin@LocalHost?subject=Hallo&to=michi>1</a>
|
||||
<a href="mailto:Dude <calvin@studcs.uni-sb.de> , Killer <calvin@cs.uni-sb.de>?subject=bla">2</a>
|
||||
<a href="mailto:Bastian Kleineidam <calvin@studcs.uni-sb.de>?bcc=jsmith%40company.com">3</a>
|
||||
<a href="mailto:Bastian Kleineidam <calvin@studcs.uni-sb.de>">4</a>
|
||||
<a href="mailto:">6</a>
|
||||
<a href="mailto:o'hara@cs.uni-sb.de">5</a>
|
||||
<a href="mailto:?to=calvin@studcs.uni-sb.de?subject=blubb">...</a>
|
||||
<a href="mailto:jan@jan-dittberner.de?subject=test">...</a>
|
||||
<!-- illegal -->
|
||||
<!-- contains non-quoted characters -->
|
||||
<a href="mailto:a@d?subject=äöü">5</a>
|
||||
<a href="mailto:calvin@cs.uni-sb.de?subject=Halli hallo">_</a>
|
||||
<!-- ? extension forbidden in <> construct -->
|
||||
<a href="mailto:Bastian Kleineidam <calvin@host1?foo=bar>">3</a>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
<!-- news testing -->
|
||||
<a href="news:comp.os.linux.misc">
|
||||
<!-- snews -->
|
||||
<a href="snews:de.comp.os.unix.linux.misc">
|
||||
<!-- no group -->
|
||||
<a href="news:">
|
||||
<!-- illegal syntax -->
|
||||
<a href="news:§$%&/´`(§%">
|
||||
<!-- nttp scheme with host -->
|
||||
<a href="nntp://news.rz.uni-sb.de/comp.lang.python">
|
||||
<!-- article span -->
|
||||
<a href="nntp://news.rz.uni-sb.de/comp.lang.python/1-5">
|
||||
<!-- article number -->
|
||||
<a href="nntp://news.rz.uni-sb.de/EFGJG4.7A@deshaw.com">
|
||||
<!-- host but no group -->
|
||||
<a href="nntp://news.rz.uni-sb.de/">
|
||||
<!-- article span -->
|
||||
<a href="news:comp.lang.python/1-5">
|
||||
|
||||
10
test/output/test_base
Normal file
10
test/output/test_base
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
test_base
|
||||
url file:///home/calvin/projects/linkchecker/test/output/base1.html
|
||||
realurl file:/home/calvin/projects/linkchecker/test/output/base1.html
|
||||
error Error: [Errno 2] No such file or directory: '/home/calvin/projects/linkchecker/test/output/base1.html'
|
||||
url file:///home/calvin/projects/linkchecker/test/output/base2.html
|
||||
realurl file:/home/calvin/projects/linkchecker/test/output/base2.html
|
||||
error Error: [Errno 2] No such file or directory: '/home/calvin/projects/linkchecker/test/output/base2.html'
|
||||
url file:///home/calvin/projects/linkchecker/test/output/base3.html
|
||||
realurl file:/home/calvin/projects/linkchecker/test/output/base3.html
|
||||
error Error: [Errno 2] No such file or directory: '/home/calvin/projects/linkchecker/test/output/base3.html'
|
||||
248
test/regrtest.py
Executable file
248
test/regrtest.py
Executable file
|
|
@ -0,0 +1,248 @@
|
|||
#!/usr/bin/env python2
|
||||
|
||||
# this file is _not_ the original Python2 regression test suite.
|
||||
|
||||
"""Bastis Regression test.
|
||||
|
||||
This will find all modules whose name is "test_*" in the test
|
||||
directory, and run them. Various command line options provide
|
||||
additional facilities.
|
||||
|
||||
Command line options:
|
||||
|
||||
-v, --verbose
|
||||
run tests in verbose mode with output to stdout
|
||||
-q, --quiet
|
||||
don't print anything except if a test fails
|
||||
-g, --generate
|
||||
write the output file for a test instead of comparing it
|
||||
-x, --exclude
|
||||
arguments are tests to *exclude*
|
||||
-r, --random
|
||||
randomize test execution order
|
||||
|
||||
If non-option arguments are present, they are names for tests to run,
|
||||
unless -x is given, in which case they are names for tests not to run.
|
||||
If no test names are given, all tests are run.
|
||||
|
||||
-v is incompatible with -g and does not compare test output files.
|
||||
"""
|
||||
|
||||
import sys,getopt,os,string
|
||||
|
||||
import test_support
|
||||
|
||||
def main(tests=None, testdir=None, verbose=0, quiet=0, generate=0,
|
||||
exclude=0, randomize=0):
|
||||
"""Execute a test suite.
|
||||
|
||||
This also parses command-line options and modifies its behavior
|
||||
accordingly.
|
||||
|
||||
tests -- a list of strings containing test names (optional)
|
||||
testdir -- the directory in which to look for tests (optional)
|
||||
|
||||
Users other than the Python test suite will certainly want to
|
||||
specify testdir; if it's omitted, the directory containing the
|
||||
Python test suite is searched for.
|
||||
|
||||
If the tests argument is omitted, the tests listed on the
|
||||
command-line will be used. If that's empty, too, then all *.py
|
||||
files beginning with test_ will be used.
|
||||
|
||||
The other seven default arguments (verbose, quiet, generate, exclude,
|
||||
single, randomize, and findleaks) allow programmers calling main()
|
||||
directly to set the values that would normally be set by flags on the
|
||||
command line.
|
||||
|
||||
"""
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:],
|
||||
'vgqxsrl',
|
||||
['verbose',
|
||||
'generate',
|
||||
'quiet',
|
||||
'exclude',
|
||||
'random',])
|
||||
except getopt.error, msg:
|
||||
error(msg)
|
||||
usage()
|
||||
return -1
|
||||
for opt, val in opts:
|
||||
if opt in ('-v','--verbose'): verbose = verbose + 1
|
||||
if opt in ('-q','--quiet'): quiet = 1; verbose = 0
|
||||
if opt in ('-g','--generate'): generate = 1
|
||||
if opt in ('-x','--exclude'): exclude = 1
|
||||
if opt in ('-r','--random'): randomize = 1
|
||||
if generate and verbose:
|
||||
print "-g and -v don't go together!"
|
||||
return 2
|
||||
good = []
|
||||
bad = []
|
||||
skipped = []
|
||||
|
||||
for i in range(len(args)):
|
||||
# Strip trailing ".py" from arguments
|
||||
if args[i][-3:] == '.py':
|
||||
args[i] = args[i][:-3]
|
||||
stdtests = STDTESTS[:]
|
||||
nottests = NOTTESTS[:]
|
||||
if exclude:
|
||||
for arg in args:
|
||||
if arg in stdtests:
|
||||
stdtests.remove(arg)
|
||||
nottests[:0] = args
|
||||
args = []
|
||||
tests = tests or args or findtests(testdir, stdtests, nottests)
|
||||
if randomize:
|
||||
random.shuffle(tests)
|
||||
test_support.verbose = verbose # Tell tests to be moderately quiet
|
||||
save_modules = sys.modules.keys()
|
||||
for test in tests:
|
||||
if not quiet:
|
||||
print test
|
||||
ok = runtest(test, generate, verbose, quiet, testdir)
|
||||
if ok > 0:
|
||||
good.append(test)
|
||||
elif ok == 0:
|
||||
bad.append(test)
|
||||
else:
|
||||
skipped.append(test)
|
||||
# Unload the newly imported modules (best effort finalization)
|
||||
for module in sys.modules.keys():
|
||||
if module not in save_modules and module.startswith("test."):
|
||||
test_support.unload(module)
|
||||
if good and not quiet:
|
||||
if not bad and not skipped and len(good) > 1:
|
||||
print "All",
|
||||
print count(len(good), "test"), "OK."
|
||||
if bad:
|
||||
print count(len(bad), "test"), "failed:",
|
||||
print string.join(bad)
|
||||
if skipped and not quiet:
|
||||
print count(len(skipped), "test"), "skipped:",
|
||||
print string.join(skipped)
|
||||
|
||||
return len(bad) > 0
|
||||
|
||||
STDTESTS = [
|
||||
'test_base',
|
||||
# 'test_frames',
|
||||
]
|
||||
|
||||
NOTTESTS = [
|
||||
'test_support',
|
||||
]
|
||||
|
||||
def findtests(testdir=None, stdtests=STDTESTS, nottests=NOTTESTS):
|
||||
"""Return a list of all applicable test modules."""
|
||||
if not testdir: testdir = findtestdir()
|
||||
names = os.listdir(testdir)
|
||||
tests = []
|
||||
for name in names:
|
||||
if name[:5] == "test_" and name[-3:] == ".py":
|
||||
modname = name[:-3]
|
||||
if modname not in stdtests and modname not in nottests:
|
||||
tests.append(modname)
|
||||
tests.sort()
|
||||
return stdtests + tests
|
||||
|
||||
def runtest(test, generate, verbose, quiet, testdir = None):
|
||||
"""Run a single test.
|
||||
test -- the name of the test
|
||||
generate -- if true, generate output, instead of running the test
|
||||
and comparing it to a previously created output file
|
||||
verbose -- if true, print more messages
|
||||
quiet -- if true, don't print 'skipped' messages (probably redundant)
|
||||
testdir -- test directory
|
||||
"""
|
||||
test_support.unload(test)
|
||||
if not testdir: testdir = findtestdir()
|
||||
outputdir = os.path.join(testdir, "output")
|
||||
outputfile = os.path.join(outputdir, test)
|
||||
try:
|
||||
if generate:
|
||||
cfp = open(outputfile, "w")
|
||||
elif verbose:
|
||||
cfp = sys.stdout
|
||||
else:
|
||||
cfp = Compare(outputfile)
|
||||
except IOError:
|
||||
cfp = None
|
||||
print "Warning: can't open", outputfile
|
||||
try:
|
||||
save_stdout = sys.stdout
|
||||
try:
|
||||
if cfp:
|
||||
sys.stdout = cfp
|
||||
print test # Output file starts with test name
|
||||
__import__(test, globals(), locals(), [])
|
||||
if cfp and not (generate or verbose):
|
||||
cfp.close()
|
||||
finally:
|
||||
sys.stdout = save_stdout
|
||||
except (ImportError, test_support.TestSkipped), msg:
|
||||
if not quiet:
|
||||
print "test", test,
|
||||
print "skipped -- ", msg
|
||||
return -1
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except test_support.TestFailed, msg:
|
||||
print "test", test, "failed --", msg
|
||||
return 0
|
||||
except:
|
||||
type, value = sys.exc_info()[:2]
|
||||
print "test", test, "crashed --", str(type) + ":", value
|
||||
if verbose:
|
||||
traceback.print_exc(file=sys.stdout)
|
||||
return 0
|
||||
else:
|
||||
return 1
|
||||
|
||||
|
||||
def findtestdir():
|
||||
if __name__ == '__main__':
|
||||
file = sys.argv[0]
|
||||
else:
|
||||
file = __file__
|
||||
testdir = os.path.dirname(file) or os.curdir
|
||||
return testdir
|
||||
|
||||
|
||||
def count(n, word):
|
||||
if n == 1:
|
||||
return "%d %s" % (n, word)
|
||||
else:
|
||||
return "%d %ss" % (n, word)
|
||||
|
||||
|
||||
class Compare:
|
||||
|
||||
def __init__(self, filename):
|
||||
self.fp = open(filename, 'r')
|
||||
|
||||
def write(self, data):
|
||||
expected = self.fp.read(len(data))
|
||||
if data <> expected:
|
||||
raise test_support.TestFailed, \
|
||||
'Writing: '+`data`+', expected: '+`expected`
|
||||
|
||||
def writelines(self, listoflines):
|
||||
map(self.write, listoflines)
|
||||
|
||||
def flush(self):
|
||||
pass
|
||||
|
||||
def close(self):
|
||||
leftover = self.fp.read()
|
||||
if leftover:
|
||||
raise test_support.TestFailed, 'Unread: '+`leftover`
|
||||
self.fp.close()
|
||||
|
||||
def isatty(self):
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
|
@ -1,23 +0,0 @@
|
|||
Just some HTTP links
|
||||
<a href="http://www.garantiertnixgutt.bla">bad url</a>
|
||||
<a href="http://www.heise.de">ok</a>
|
||||
<a href="http:/www.heise.de">one slash</a>
|
||||
<a href="http:www.heise.de">no slash</a>
|
||||
<a href="http://">no url</a>
|
||||
<a href="http:/">no url, one slash</a>
|
||||
<a href="http:">no url, no slash</a>
|
||||
<a href="http://www.blubb.de/stalter&sohn">unquoted ampersand</a>
|
||||
<a name="iswas">anchor for test2.html</a>
|
||||
<a href=http://slashdot.org/>unquoted</a>
|
||||
<a href="http://treasure.calvinsplayground.de/~calvin/software/#isnix"
|
||||
>invalid anchor</a>
|
||||
<a href="http://treasure.calvinsplayground.de/~calvin/isnich/"
|
||||
>authorization (user=calvin, pass=calvin)</a>
|
||||
<a href="https://www.heise.de">https</a>
|
||||
<a href="HtTP://WWW.hEIsE.DE">should be cached</a>
|
||||
<a href="HTTP://WWW.HEISE.DE">should be cached</a>
|
||||
<!-- <a href=http://nocheckin> no check because of comment -->
|
||||
<a href=illegalquote1">no beginning quote</a>
|
||||
<a href="illegalquote2>no ending quote</a>
|
||||
<!-- check the parser at end of file -->
|
||||
<a href="g
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
<!-- meta url -->
|
||||
<meta http-equiv="refresh" content="5; url=http://localhost">
|
||||
<a href="hutzli:nixgutt"> <!-- bad scheme -->
|
||||
<a href="javascript:loadthis()"> <!-- javascript (ignore) -->
|
||||
<a href="file:///etc/group"> <!-- good file -->
|
||||
<a href="file://etc/group"> <!-- bad file -->
|
||||
<a href="file:/etc/group"> <!-- good file -->
|
||||
<a href="file:etc/group"> <!-- bad file -->
|
||||
<a href="file:/etc/"> <!-- good dir -->
|
||||
<a href="test1.html"> <!-- relative url -->
|
||||
<a href="test1.html#isnix"> <!-- bad anchor -->
|
||||
<a href="test1.html#iswas"> <!-- good anchor -->
|
||||
<a href="telnet:localhost"> <!-- telnet to localhost -->
|
||||
<a href="telnet:"> <!-- telnet without host -->
|
||||
<a href="ftp:/treasure.calvinsplayground.de/pub"> <!-- ftp one slash -->
|
||||
<a href="ftp://treasure.calvinsplayground.de/pub"> <!-- ftp two slashes -->
|
||||
<a href="ftp://treasure.calvinsplayground.de//pub"> <!-- ftp two dir slashes -->
|
||||
<a href="ftp://treasure.calvinsplayground.de////////pub"> <!-- ftp many dir slashes -->
|
||||
<a href="ftp:///treasure.calvinsplayground.de/pub"> <!-- ftp three slashes -->
|
||||
14
test/test_base.py
Normal file
14
test/test_base.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
import os,sys
|
||||
sys.path.append(os.getcwd())
|
||||
import linkcheck
|
||||
config = linkcheck.Config.Configuration()
|
||||
config['recursionlevel'] = 1
|
||||
config['log'] = config.newLogger('test')
|
||||
config["anchors"] = 1
|
||||
config["verbose"] = 1
|
||||
config.disableThreading()
|
||||
htmldir = "test/html"
|
||||
for file in ('base1.html','base2.html','base3.html'):
|
||||
url = os.path.join(htmldir, file)
|
||||
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0))
|
||||
linkcheck.checkUrls(config)
|
||||
72
test/test_support.py
Normal file
72
test/test_support.py
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
"""Supporting definitions for the Python regression test."""
|
||||
|
||||
|
||||
class Error(Exception):
|
||||
"""Base class for regression test exceptions."""
|
||||
|
||||
class TestFailed(Error):
|
||||
"""Test failed."""
|
||||
|
||||
class TestSkipped(Error):
|
||||
"""Test skipped.
|
||||
|
||||
This can be raised to indicate that a test was deliberatly
|
||||
skipped, but not because a feature wasn't available. For
|
||||
example, if some resource can't be used, such as the network
|
||||
appears to be unavailable, this should be raised instead of
|
||||
TestFailed.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
verbose = 1 # Flag set to 0 by regrtest.py
|
||||
|
||||
def unload(name):
|
||||
import sys
|
||||
try:
|
||||
del sys.modules[name]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
def forget(modname):
|
||||
unload(modname)
|
||||
import sys, os
|
||||
for dirname in sys.path:
|
||||
try:
|
||||
os.unlink(os.path.join(dirname, modname + '.pyc'))
|
||||
except os.error:
|
||||
pass
|
||||
|
||||
FUZZ = 1e-6
|
||||
|
||||
def fcmp(x, y): # fuzzy comparison function
|
||||
if type(x) == type(0.0) or type(y) == type(0.0):
|
||||
try:
|
||||
x, y = coerce(x, y)
|
||||
fuzz = (abs(x) + abs(y)) * FUZZ
|
||||
if abs(x-y) <= fuzz:
|
||||
return 0
|
||||
except:
|
||||
pass
|
||||
elif type(x) == type(y) and type(x) in (type(()), type([])):
|
||||
for i in range(min(len(x), len(y))):
|
||||
outcome = fcmp(x[i], y[i])
|
||||
if outcome <> 0:
|
||||
return outcome
|
||||
return cmp(len(x), len(y))
|
||||
return cmp(x, y)
|
||||
|
||||
TESTFN = '@test' # Filename used for testing
|
||||
from os import unlink
|
||||
|
||||
def findfile(file, here=__file__):
|
||||
import os
|
||||
if os.path.isabs(file):
|
||||
return file
|
||||
import sys
|
||||
path = sys.path
|
||||
path = [os.path.dirname(here)] + path
|
||||
for dn in path:
|
||||
fn = os.path.join(dn, file)
|
||||
if os.path.exists(fn): return fn
|
||||
return file
|
||||
Loading…
Reference in a new issue