regression test suite

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@229 e7d03fd6-7b0d-0410-9947-9c21f3af8025
2026-04-22 15:14:44 +00:00 · 2001-02-18 20:29:52 +00:00 · 2001-02-18 20:29:52 +00:00 · 5d1c6ef00e
commit 5d1c6ef00e
parent 83a0846fef
24 changed files with 609 additions and 306 deletions
--- a/.cvsignore
+++ b/.cvsignore
@ -6,6 +6,7 @@ dist
 foo
 MANIFEST
 VERSION
-LinkCheckerConf.py
+linkcheckerConf.py
+js
 locale
 Packages.gz
--- a/6
+++ b/6
@ -68,11 +68,7 @@ uploadpull: distclean dist package files VERSION
 	ssh -C -t shell1.sourceforge.net "cd /home/groups/$(PACKAGE) && make pull"

 test:
-	rm -f test/*.result
-	@for i in test/*.html; do \
-	  echo "Testing $$i. Results are in $$i.result"; \
-	  ./$(PACKAGE) -r1 -ucalvin -pcalvin -otext -N"news.rz.uni-sb.de" -v -a $$i > $$i.result 2>&1; \
-        done
+	python2 test/regrtest.py

 locale:
 	$(MAKE) -C po
--- a/debian/changelog
+++ b/debian/changelog
@ -1,9 +1,10 @@
 linkchecker (1.3.0) unstable; urgency=low

-  * require Python 2.0
+  * require and use Python >= 2.0
  * fix agent matching in robotparser2.py
+  * added more LinkPatterns (ripped from HTML::Tagset.pm)

- -- Bastian Kleineidam <calvin@users.sourceforge.net>  Thu,  1 Feb 2001 01:51:27 +0100
+ -- Bastian Kleineidam <calvin@users.sourceforge.net>  Fri,  9 Feb 2001 10:51:24 +0100

 linkchecker (1.2.14) unstable; urgency=low

--- a/debian/control
+++ b/debian/control
@ -2,7 +2,7 @@ Source: linkchecker
 Section: web
 Priority: optional
 Maintainer: Bastian Kleineidam <calvin@users.sourceforge.net>
-Build-Depends: python2-base (>= 2.0), python2-base (<= 2.0), python2-dev (>= 1.5.2), python2-dev (<= 2.0), debhelper
+Build-Depends: python2-base (>= 2.0), python2-base (<= 2.0), python2-dev (>= 1.5.2), python2-dev (<= 2.0), debhelper (>= 3.0.0)
 Build-Depends-Indep: gettext
 Standards-Version: 3.2.1

--- a/debian/rules
+++ b/debian/rules
@ -60,7 +60,6 @@ binary-indep: build install

 # Build architecture-dependent files here.
 binary-arch: build install
-#	dh_testversion
 	dh_testdir
 	dh_testroot
 #	dh_installdebconf
@ -71,7 +70,7 @@ binary-arch: build install
 #	dh_installpam
 #	dh_installinit
 #	dh_installcron
-	dh_installmanpages
+	dh_installman linkchecker.1
 #	dh_installinfo
 #	dh_undocumented linkchecker.1
 	dh_installchangelogs 
@ -79,8 +78,6 @@ binary-arch: build install
 	dh_strip
 	dh_compress
 	dh_fixperms
-	# You may want to make some executables suid here.
-#	dh_suidregister
 #	dh_makeshlibs
 	dh_installdeb
 #	dh_perl
--- a/linkcheck/Config.py
+++ b/linkcheck/Config.py
@ -54,6 +54,7 @@ Loggers = {
    "csv": Logging.CSVLogger,
    "blacklist": Logging.BlacklistLogger,
    "xml": Logging.XMLLogger,
+    "test": Logging.TestLogger,
 }
 # for easy printing: a comma separated logger list
 LoggerKeys = reduce(lambda x, y: x+", "+y, Loggers.keys())
@ -102,9 +103,10 @@ class Configuration(UserDict.UserDict):
 	                          'joe@')]
        self["proxy"] = getproxies()
        self["recursionlevel"] = 1
-        self["robotstxt"] = 0
+        self["robotstxt"] = 1
        self["strict"] = 0
        self["fileoutput"] = []
+        self["loggingfields"] = "all"
        # Logger configurations
        self["text"] = {
            "filename": "linkchecker-out.txt",
@ -152,6 +154,7 @@ class Configuration(UserDict.UserDict):
        self['xml'] = {
            "filename":     "linkchecker-out.xml",
        }
+        self['test'] = {} #  no args for test logger
        # default values
        self['log'] = self.newLogger('text')
        self["quiet"] = 0
@ -412,8 +415,9 @@ class Configuration(UserDict.UserDict):
        try: self["warnings"] = cfgparser.getboolean(section, "warnings")
        except ConfigParser.Error: pass
        try:
-            filelist = string.split(cfgparser.get(section, "fileoutput"))
+            filelist = string.split(cfgparser.get(section, "fileoutput"), ",")
            for arg in filelist:
+                arg = string.strip(arg)
                # no file output for the blacklist Logger
                if Loggers.has_key(arg) and arg != "blacklist":
 		    self['fileoutput'].append(
@ -424,6 +428,10 @@ class Configuration(UserDict.UserDict):
                for opt in cfgparser.options(key):
                    try: self[key][opt] = cfgparser.get(key, opt)
                    except ConfigParser.Error: pass
+        try:
+            self['loggingfields'] = map(string.strip, string.split(
+	        cfgparser.get(section, 'loggingfields'), ","))
+	except ConfigParser.Error: pass

        section="checking"
        try:
--- a/linkcheck/Logging.py
+++ b/linkcheck/Logging.py
@ -44,22 +44,22 @@ import Config, StringUtil
 import linkcheck
 _ = linkcheck._

-# keywords
-KeyWords = ["Real URL",
-    "Result",
-    "Base",
-    "Name",
-    "Parent URL",
-    "Info",
-    "Warning",
-    "D/L Time",
-    "Check Time",
-    "URL",
-]
-MaxIndent = max(map(lambda x: len(_(x)), KeyWords))+1
+LogFields = {
+    "realurl": "Real URL",
+    "result": "Result",
+    "base": "Base",
+    "name": "Name",
+    "parenturl": "Parent URL",
+    "info": "Info",
+    "warning": "Warning",
+    "downloadtime": "D/L Time",
+    "checktime": "Check Time",
+    "url": "URL",
+}
+MaxIndent = max(map(lambda x: len(_(x)), LogFields.values()))+1
 Spaces = {}
-for key in KeyWords:
-    Spaces[key] = " "*(MaxIndent - len(_(key)))
+for key,value in LogFields.items():
+    Spaces[key] = " "*(MaxIndent - len(_(value)))

 EntityTable = {
    '<': '&lt;',
@ -99,7 +99,13 @@ class StandardLogger:
            self.fd = args['fd']
        else:
 	    self.fd = sys.stdout
+        self.logfields = None # all fields
+        if args.has_key('logfields'):
+            if type(args['logfields']) == ListType:
+                self.logfields = args

+    def logfield(self, name):
+        return self.logfields and name in self.logfields

    def init(self):
        self.starttime = time.time()
@ -111,14 +117,15 @@ class StandardLogger:


    def newUrl(self, urlData):
-        self.fd.write("\n"+_("URL")+Spaces["URL"]+urlData.urlName)
-        if urlData.cached:
-            self.fd.write(_(" (cached)\n"))
-        else:
-            self.fd.write("\n")
-        if urlData.name:
-            self.fd.write(_("Name")+Spaces["Name"]+urlData.name+"\n")
-        if urlData.parentName:
+        if self.logfield('url'):
+            self.fd.write("\n"+_(LogFields['url'])+Spaces['url']+urlData.urlName)
+            if urlData.cached:
+                self.fd.write(_(" (cached)\n"))
+            else:
+                self.fd.write("\n")
+        if urlData.name and self.logfield('name'):
+            self.fd.write(_(LogFields["name"])+Spaces["name"]+urlData.name+"\n")
+        if urlData.parentName and self.logfield('parentname'):
            self.fd.write(_("Parent URL")+Spaces["Parent URL"]+
 	                  urlData.parentName+_(", line ")+
 	                  str(urlData.line)+"\n")
@ -739,3 +746,33 @@ class CSVLogger(StandardLogger):
        self.fd.flush()
        self.fd = None

+class TestLogger:
+    """ Output for regression test """
+    def __init__(self, **args):
+        pass
+
+    def init(self):
+        pass
+
+    def newUrl(self, urlData):
+        print 'url',urlData.urlName
+        if urlData.cached:
+            print "cached"
+        if urlData.name:
+            print "name",urlData.name
+        if urlData.parentName:
+            print "parenturl",urlData.parentName
+	    print "line",urlData.line
+        if urlData.baseRef:
+            print "baseurl",urlData.baseRef
+        if urlData.infoString:
+            print "info",urlData.infoString
+        if urlData.warningString:
+            print "warning",urlData.warningString
+        if urlData.valid:
+            print "valid",urlData.validString
+        else:
+            print "error",urlData.errorString
+
+    def endOfOutput(self, linknumber=-1):
+        pass
--- a/linkcheck/robotparser2.py
+++ b/linkcheck/robotparser2.py
@ -137,7 +137,7 @@ class RobotFileParser:
            return 1
        # search for given user agent matches
        # the first match counts
-        url = urllib.quote(urlparse.urlparse(url)[2])
+        url = urllib.quote(urlparse.urlparse(url)[2]) or "/"
        for entry in self.entries:
            if entry.applies_to(useragent):
                return entry.allowance(url)
@ -222,6 +222,8 @@ def _test():
        rp.parse(open(sys.argv[1]).readlines())
    # test for re.escape
    _check(rp.can_fetch('*', 'http://www.musi-cal.com/'), 1)
+    # empty url path
+    _check(rp.can_fetch('*', 'http://www.musi-cal.com'), 1)
    # this should match the first rule, which is a disallow
    _check(rp.can_fetch('', 'http://www.musi-cal.com/'), 0)
    # various cherry pickers
--- a/202
+++ b/202
@ -27,102 +27,106 @@ import linkcheck
 from linkcheck import _,StringUtil


-Usage = _("USAGE\tlinkchecker [options] file-or-url...\n"
-"\n"
-"OPTIONS\n"
-"For single-letter option arguments the space is not a necessity. So\n"
-"'-o colored' is the same as '-ocolored'.\n"
-"-a, --anchors\n"
-"        Check anchor references. Default is don't check anchors.\n"
-"-d, --denyallow\n"
-"        Swap checking order to extern/intern. Default checking order\n"
-"        is intern/extern.\n"
-"-D, --debug\n"
-"        Print additional debugging information.\n"
-"-e regex, --extern=regex\n"
-"        Assume urls that match the given expression as extern.\n"
-"        Only intern HTML links are checked recursively.\n"
-"-f file, --config=file\n"
-"        Use file as configuration file. LinkChecker first searches\n"
-"        ~/.linkcheckerrc and then /etc/linkcheckerrc\n"
-"        (under Windows <path-to-program>\\linkcheckerrc).\n"
-"-F type, --file-output=type\n"
-"        Same as output, but write to a file linkchecker-out.<type>.\n"
-"        If the file already exists, it is overwritten. You can specify\n"
-"        this option more than once. There is no file output for the\n"
-"        blacklist logger. Default is no file output.\n"
-"-i regex, --intern=regex\n"
-"        Assume URLs that match the given expression as intern.\n"
-"        LinkChecker descends recursively only to intern URLs, not to extern.\n"
-"-h, --help\n"
-"        Help me! Print usage information for this program.\n"
-"-N server, --nntp-server=server\n"
-"        Specify an NNTP server for 'news:...' links. Default is the\n"
-"        environment variable NNTP_SERVER. If no host is given,\n"
-"        only the syntax of the link is checked.\n"
-"-o type, --output=type\n"
-"        Specify output type as %s.\n"
-"        Default type is text.\n"
-"-p pwd, --password=pwd\n"
-"        Try password pwd for HTML and FTP authorization.\n"
-"        Default password is 'joe@'. See also -u.\n"
-"-q, --quiet\n"
-"        Quiet operation. This is only useful with -F.\n"
-"-r depth, --recursion-level=depth\n"
-"        Check recursively all links up to given depth (depth >= 0).\n"
-"        Default depth is 1.\n"
-"-R, --robots-txt\n"
-"        Obey the robots exclusion standard.\n"
-"-s, --strict\n"
-"        Check only syntax of extern links, do not try to connect to them.\n"
-"-t num, --threads=num\n"
-"        Generate no more than num threads. Default number of threads is 5.\n"
-"        To disable threading specify a non-positive number.\n"
-"-u name, --user=name\n"
-"        Try username name for HTML and FTP authorization.\n"
-"        Default is 'anonymous'. See also -p.\n"
-"-V, --version\n"
-"        Print version and exit.\n"
-"-v, --verbose\n"
-"        Log all checked URLs (implies -w). Default is to log only invalid\n"
-"        URLs.\n"
-"-w, --warnings\n"
-"        Log warnings.\n"
-"-W regex, --warning-regex=regex\n"
-"        Define a regular expression which prints a warning if it matches\n"
-"        any content of the checked link.\n"
-"        This applies of course only to pages which are valid, so we can\n"
-"        get their content.\n"
-"        Use this to check for pages that contain some form of error\n"
-"        message, for example 'This page has moved' or 'Oracle\n"
-"        Application Server error'.\n"
-"        This option implies -w.\n") % linkcheck.Config.LoggerKeys
+Usage = _("""USAGE\tlinkchecker [options] file-or-url...

-Notes = _("NOTES\n"
-"o LinkChecker assumes an http:// resp. ftp:// link when a commandline URL\n"
-"  starts with 'www.' resp. 'ftp.'\n"
-"  You can also give local files as arguments.\n"
-"o If you have your system configured to automatically establish a\n"
-"  connection to the internet (e.g. with diald), it will connect when\n"
-"  checking links not pointing to your local host.\n"
-"  Use the -s and -i options to prevent this.\n"
-"o Javascript links are currently ignored.\n"
-"o If your platform does not support threading, LinkChecker uses -t0.\n"
-"o You can supply multiple user/password pairs in a configuration file.\n"
-"o Cookies are not accepted by LinkChecker.\n"
-"o To use proxies set $http_proxy, $https_proxy on Unix or Windows.\n"
-"  On a Mac use the Internet Config.\n"
-"o When checking 'news:' links the given NNTP host doesn't need to be the\n"
-"  same as the host of the user browsing your pages!\n")
+OPTIONS
+For single-letter option arguments the space is not a necessity. So
+'-o colored' is the same as '-ocolored'.
+-a, --anchors
+        Check anchor references. Default is don't check anchors.
+-d, --denyallow
+        Swap checking order to extern/intern. Default checking order
+        is intern/extern.
+-D, --debug
+        Print additional debugging information.
+-e regex, --extern=regex
+        Assume urls that match the given expression as extern.
+        Only intern HTML links are checked recursively.
+-f file, --config=file
+        Use file as configuration file. LinkChecker first searches
+        ~/.linkcheckerrc and then /etc/linkcheckerrc
+        (under Windows <path-to-program>\\linkcheckerrc).
+-F type[/filename], --file-output=type[/filename]
+        Same as output, but write to a file linkchecker-out.<type>
+        or <filename> if specified. If the file already exists, it
+        is overwritten. You can specify this option more than once.
+        There is no file output for the blacklist logger. Default is
+        no file output.
+-i regex, --intern=regex
+        Assume URLs that match the given expression as intern.
+        LinkChecker descends recursively only to intern URLs, not to extern.
+-h, --help
+        Help me! Print usage information for this program.
+-N server, --nntp-server=server
+        Specify an NNTP server for 'news:...' links. Default is the
+        environment variable NNTP_SERVER. If no host is given,
+        only the syntax of the link is checked.
+-o type, --output=type
+        Specify output type as %s.
+        Default type is text.
+-p pwd, --password=pwd
+        Try password pwd for HTML and FTP authorization.
+        Default password is 'joe@'. See also -u.
+-q, --quiet
+        Quiet operation. This is only useful with -F.
+-r depth, --recursion-level=depth
+        Check recursively all links up to given depth (depth >= 0).
+        Default depth is 1.
+-R, --robots-txt
+        Obey the robots exclusion standard.
+-s, --strict
+        Check only syntax of extern links, do not try to connect to them.
+-t num, --threads=num
+        Generate no more than num threads. Default number of threads is 5.
+        To disable threading specify a non-positive number.
+-u name, --user=name
+        Try username name for HTML and FTP authorization.
+        Default is 'anonymous'. See also -p.
+-V, --version
+        Print version and exit.
+-v, --verbose
+        Log all checked URLs (implies -w). Default is to log only invalid
+        URLs.
+-w, --warnings
+        Log warnings.
+-W regex, --warning-regex=regex
+        Define a regular expression which prints a warning if it matches
+        any content of the checked link.
+        This applies of course only to pages which are valid, so we can
+        get their content.
+        Use this to check for pages that contain some form of error
+        message, for example 'This page has moved' or 'Oracle
+        Application Server error'.
+        This option implies -w.\n") % linkcheck.Config.LoggerKeys
+"""

-Examples = _("EXAMPLES\n"
-"o linkchecker -v -ohtml -r2 -s -itreasure.calvinsplayground.de \\\n"
-"    http://treasure.calvinsplayground.de/~calvin/ > sample.html\n"
-"o Local files and syntactic sugar on the command line:\n"
-"      linkchecker c:\\temp\\test.html\n"
-"      linkchecker ../bla.html\n"
-"      linkchecker www.myhomepage.de\n"
-"      linkchecker -r0 ftp.linux.org\n")
+Notes = _("""NOTES
+o LinkChecker assumes an http:// resp. ftp:// link when a commandline URL
+  starts with 'www.' resp. 'ftp.'
+  You can also give local files as arguments.
+o If you have your system configured to automatically establish a
+  connection to the internet (e.g. with diald), it will connect when
+  checking links not pointing to your local host.
+  Use the -s and -i options to prevent this.
+o Javascript links are currently ignored.
+o If your platform does not support threading, LinkChecker uses -t0.
+o You can supply multiple user/password pairs in a configuration file.
+o Cookies are not accepted by LinkChecker.
+o To use proxies set $http_proxy, $https_proxy on Unix or Windows.
+  On a Mac use the Internet Config.
+o When checking 'news:' links the given NNTP host doesn't need to be the
+  same as the host of the user browsing your pages!
+""")
+
+Examples = _("""EXAMPLES
+o linkchecker -v -ohtml -r2 -s -itreasure.calvinsplayground.de \\
+    http://treasure.calvinsplayground.de/~calvin/ > sample.html
+o Local files and syntactic sugar on the command line:
+      linkchecker c:\\temp\\test.html
+      linkchecker ../bla.html
+      linkchecker www.myhomepage.de
+      linkchecker -r0 ftp.linux.org
+""")

 def printVersion():
    print linkcheck.Config.AppInfo
@ -207,9 +211,13 @@ for opt,arg in options:
 	               "'-o, --output'")

    elif opt=="-F" or opt=="--file-output":
-        if linkcheck.Config.Loggers.has_key(arg) and arg != "blacklist":
-            config['fileoutput'].append(
-	        config.newLogger(arg, {'fileoutput':1}))
+        ns = {'fileoutput':1}
+        try:
+            type, ns['filename'] = string.split(arg, '/', 1)
+            if not ns['filename']: raise ValueError
+        except ValueError: type = arg
+        if linkcheck.Config.Loggers.has_key(type) and type != "blacklist":
+            config['fileoutput'].append(config.newLogger(type, ns))
        else:
            printUsage((_("Illegal argument '%s' for option ") % arg) +\
 	               "'-F, --file-output'")
--- a/linkchecker.1
+++ b/linkchecker.1
@ -29,84 +29,85 @@ a (Fast)CGI web interface (requires HTTP server)
 For single-letter option arguments the space is not a necessity.
 So \fI-o colored\fP is the same as \fI-ocolored\fP.
 .TP
-\fB-a, --anchors\fP
+\fB-a\fP, \fB--anchors\fP
 Check anchor references. Default is don't check anchors.
 .TP
-\fB-d, --denyallow\fP
+\fB-d\fP, \fB--denyallow\fP
 Swap checking order to extern/intern. Default checking order is
 intern/extern.
 .TP
-\fB-D, --debug\fP
+\fB-D\fP, \fB--debug\fP
 Print debugging information.
 .TP
-\fB-e \fIregex\fB, --extern=\fIregex\fP
+\fB-e \fIregex\fP, \fB--extern=\fIregex\fP
 Assume urls that match the given regular expression as extern.
 Only intern HTML links are checked recursively.
 .TP
-\fB-f \fIfile\fB, --config=\fIfile\fP
+\fB-f \fIfile\fP, \fB--config=\fIfile\fP
 Use \fIfile\fP as configuration file. LinkChecker first searches for
 ~/.linkcheckerrc and then /etc/linkcheckerrc on Unix systems.
 Under Windows systems we read <path-to-program>\\linkcheckerrc.
 .TP
-\fB-F \fItype\fB, --file-output=\fItype\fP
-Same as output, but write to a file \fIlinkchecker-out.<type>\fP.
-If the file already exists, it is overwritten. You can specify this
-option more than once. There is no file output for the blacklist
-logger. Default is no file output.
+\fB-F \fItype\fP[\fI/filename\fP], \fB--file-output=\fItype\fP[\fI/filename\fP]
+Same as output, but write to a file \fIlinkchecker-out.<type>\fP
+or \fIfilename\fP if specified. If the file already exists, it is
+overwritten. You can specify this option more than once. There
+is no file output for the blacklist logger. Default is no file
+output.
 .TP
-\fB-i \fIregex\fB, --intern=\fIregex\fP
+\fB-i \fIregex\fP, \fB--intern=\fIregex\fP
 Assume URLs that match the given regular expression as intern.
 LinkChecker descends recursively only to intern URLs, not to extern.
 .TP
-\fB-h, --help\fP
+\fB-h\fP, \fB--help\fP
 Help me! Print usage information for this program.
 .TP
-\fB-N \fIserver\fB, --nntp-server=\fIserver\fP
+\fB-N \fIserver\fP, \fB--nntp-server=\fIserver\fP
 Specify an NNTP server for 'news:...' links. Default is the
 environment variable NNTP_SERVER. If no host is given,
 only the syntax of the link is checked.
 .TP
-\fB-o \fItype\fB, --output=\fItype\fP
+\fB-o \fItype\fP, \fB--output=\fItype\fP
 Specify output type as \fItext\fP, \fIcolored\fP, \fIhtml\fP, \fIsql\fP,
 \fIcsv\fP, \fIgml\fP, \fIxml\fP or \fIblacklist\fP.
 Default type is \fItext\fP.
 .TP
-\fB-p \fIpwd\fB, --password=\fIpwd\fP
+\fB-p \fIpwd\fP, \fB--password=\fIpwd\fP
 Try the password \fIpwd\fB for HTML and FTP authorization.
 The default password is \fIguest@\fP. See also \fB-u\fP.
 .TP
-\fB-q, --quiet\fP
+\fB-q\fP, \fB--quiet\fP
 Quiet operation. This is only useful with \fB-F\fP.
 .TP
-\fB-r \fIdepth\fB, --recursion-level=\fIdepth\fP
+\fB-r \fIdepth\fP, \fB--recursion-level=\fIdepth\fP
 Check recursively all links up to given \fIdepth\fP (depth >= 0).
 Default depth is 1.
 .TP
-\fB-R, --robots-txt\fP
+\fB-R\fP, \fB--robots-txt\fP
 Obey the robots exclusion standard.
 .TP
-\fB-s, --strict\fP
+\fB-s\fP, \fB--strict\fP
 Check only the syntax of extern links, do not try to connect to them.
 .TP
-\fB-t \fInum\fB, --threads=\fInum\fP
+\fB-t \fInum\fP, \fB--threads=\fInum\fP
 Generate no more than \fInum\fP threads. Default number of threads is 5.
 To disable threading specify a non-positive number.
 .TP
-\fB-u \fIname\fB, --user=\fIname\fP
+\fB-u \fIname\fP, \fB--user=\fIname\fP
 Try username \fIname\fP for HTML and FTP authorization.
 Default is \fIanonymous\fP. See also \fB-p\fP.
 .TP
-\fB-V, --version\fP
+\fB-V\fP, \fB--version\fP
 Print version and exit.
 .TP
-\fB-v, --verbose\fP
+\fB-v\fP, \fB--verbose\fP
 Log all checked URLs (implies \fB-w\fP). Default is to log only invalid
 URLs.
 .TP
-\fB-w, --warnings\fP
+\fB-w\fP, \fB--warnings\fP
 Log warnings.
 .TP
-\fB-W \fIregex\fB, --warning-regex=\fIregex\fP
+\fB-W \fIregex\fP, \fB--warning-regex=\fIregex\fP
 Define a regular expression which prints a warning if it matches any
 content of the checked link.
 This applies of course only to pages which are valid, so we can get
--- a/97
+++ b/97
@ -1,91 +1,98 @@
-# sample resource file
-# to modify, just uncomment the line
+# sample resource file with default values
 # see linkchecker -h for help on these options
 # commandline options override these settings!

 [output]
 # turn on/off debug messages
-#debug=0
+debug=0
 # use the color logger
-#log=colored
+log=text
 # turn on/off --verbose
-#verbose=0
+verbose=0
 # turn on/off --warnings
-#warnings=0
+warnings=0
 # turn on/off --quiet
-#quiet=0
+quiet=0
 # additional file output
-#fileoutput = text colored html gml sql
+fileoutput=
+#fileoutput = text, colored, html, gml, sql XXX
+# what fields should each logger print out?
+fields = all
+# field = url, parent url, base url
+# fields names: XXX
+# url
+# parent url
+# base url

 # each Logger can have separate configuration parameters
 # standard text logger
 [text]
-#filename=linkchecker-out.txt
+filename=linkchecker-out.txt

 # GML logger
 [gml]
-#filename=linkchecker-out.gml
+filename=linkchecker-out.gml

 # CSV logger
 [csv]
-#filename=linkchecker-out.csv
-#separator=;
+filename=linkchecker-out.csv
+separator=;

 # SQL logger
 [sql]
-#filename=linkchecker-out.sql
-#dbname=linksdb
-#commandsep=;
+filename=linkchecker-out.sql
+dbname=linksdb
+commandsep=;

 # HTML logger
 [html]
-#filename=linkchecker-out.html
+filename=linkchecker-out.html
 # colors for the various parts
-#colorbackground="#ffffff"
-#colorurl=blue
-#colorborder=
-#colorlink=
-#tablewarning=
-#tableok=
-#tableerror=
+colorbackground="#fff7e5"
+colorurl="#dcd5cf"
+colorborder="#000000"
+colorlink="#191c83"
+tablewarning=<td bgcolor="#e0954e">
+tableerror=<td bgcolor="#db4930">
+tableok=<td bgcolor="#3ba557">

 # ANSI color logger
 [colored]
-#filename=linkchecker-out.ansi
-# colors for the various parts
-#colorparent=
-#colorurl=
-#colorname=
-#colorreal=
-#colorbase=
-#colorvalid=
-#colorinvalid=
-#colorinfo=
-#colorwarning=
-#colordltime=
-#colorreset=
+filename=linkchecker-out.ansi
+# colors for the various parts (\x1b = ESC)
+colorparent="\x1b[37m"
+colorurl="\x1b[0m"
+colorname="\x1b[0m"
+colorreal="\x1b[36m"
+colorbase="\x1b[35m"
+colorvalid="\x1b[1;32m"
+colorinvalid="\x1b[1;31m"
+colorinfo="\x1b[0m"
+colorwarning="\x1b[1;33m"
+colordltime="\x1b[0m"
+colorreset="\x1b[0m"

 # blacklist logger
 [blacklist]
-#filename=~/.blacklist
+filename=~/.blacklist

 # checking options
 [checking]
 # number of threads
-#threads=5
+threads=5
 # check anchors?
-#anchors=0
-#recursionlevel=1
+anchors=0
+recursionlevel=1
 # obey robots.txt exclusion?
-#robotstxt=0
+robotstxt=1
 # overall strict checking. You can specify for each extern URL
 # separately if its strict or not. See the [filtering] section
-#strict=0
+strict=0
 # supply a regular expression for which warnings are printed if found
 # in any HTML files.
-#warningregex="Request failed"
+warningregex=
 # Basic NNTP server. Overrides NNTP_SERVER environment variable.
-#nntpserver=news.uni-stuttgart.de
+nntpserver=

 # filtering options (see FAQ)
 # for each extern link we can specify if it is strict or not
@ -98,7 +105,7 @@
 # internlinks=calvinsplayground\.de
 # check only syntax of all mail adresses
 # extern3=^mailto: 1
-#denyallow=0
+denyallow=0

 # You can provide different user/password pairs for different link types.
 # Entries are a triple (link regular expression, username, password),
--- a/test/.cvsignore
+++ b/test/.cvsignore
@ -1,3 +1,2 @@
-*.result
 *.prof
 *.pyc
--- a/test/init.py
+++ b/test/init.py
@ -0,0 +1 @@
+# Dummy file to make this directory a package.
--- a/test/base1.html
+++ b/test/base1.html
@ -1,8 +0,0 @@
-<!-- base without href -->
-<base target="_top">
-<!-- meta url -->
-<META HTTP-equiv="refresh" content="0; url=http://www.calvinandhobbes.com/">
-<!-- spaces between key and value -->
-<a href 
-=
-"file:/etc">
--- a/test/base2.html
+++ b/test/base2.html
@ -1,5 +0,0 @@
-<!-- base with href -->
-<base href="file:/etc/">
-<!-- good file -->
-<a href="passwd">
-
--- a/test/frames.html
+++ b/test/frames.html
@ -1,5 +0,0 @@
-<!-- frame src urls -->
-<frameset border="0" frameborder="0" framespacing="0">
-<frame name="top" src="test1.html" frameborder="0">
-<frame name="bottom" src="test2.html" frameborder="0">
-</frameset>
--- a/test/mail.html
+++ b/test/mail.html
@ -1,20 +0,0 @@
-<!-- extra mail checking -->
-<html><head></head>
-<body>
-<!-- legal -->
-<a href=mailto:calvin@LocalHost?subject=Hallo&to=michi>1</a>
-<a href="mailto:Dude <calvin@studcs.uni-sb.de> , Killer <calvin@cs.uni-sb.de>?subject=bla">2</a>
-<a href="mailto:Bastian Kleineidam <calvin@studcs.uni-sb.de>?bcc=jsmith%40company.com">3</a>
-<a href="mailto:Bastian Kleineidam <calvin@studcs.uni-sb.de>">4</a>
-<a href="mailto:">6</a>
-<a href="mailto:o'hara@cs.uni-sb.de">5</a>
-<a href="mailto:?to=calvin@studcs.uni-sb.de?subject=blubb">...</a>
-<a href="mailto:jan@jan-dittberner.de?subject=test">...</a>
-<!-- illegal -->
-<!-- contains non-quoted characters -->
-<a href="mailto:a@d?subject=äöü">5</a>
-<a href="mailto:calvin@cs.uni-sb.de?subject=Halli hallo">_</a>
-<!-- ? extension forbidden in <> construct -->
-<a href="mailto:Bastian Kleineidam <calvin@host1?foo=bar>">3</a>
-</body>
-</html>
--- a/test/news.html
+++ b/test/news.html
@ -1,19 +0,0 @@
-<!-- news testing -->
-<a href="news:comp.os.linux.misc">
-<!-- snews -->
-<a href="snews:de.comp.os.unix.linux.misc">
-<!-- no group -->
-<a href="news:">
-<!-- illegal syntax -->
-<a href="news:§$%&/´`(§%">
-<!-- nttp scheme with host -->
-<a href="nntp://news.rz.uni-sb.de/comp.lang.python">
-<!-- article span -->
-<a href="nntp://news.rz.uni-sb.de/comp.lang.python/1-5">
-<!-- article number -->
-<a href="nntp://news.rz.uni-sb.de/EFGJG4.7A@deshaw.com">
-<!-- host but no group -->
-<a href="nntp://news.rz.uni-sb.de/">
-<!-- article span -->
-<a href="news:comp.lang.python/1-5">
-
--- a/test/output/test_base
+++ b/test/output/test_base
@ -0,0 +1,10 @@
+test_base
+url file:///home/calvin/projects/linkchecker/test/output/base1.html
+realurl file:/home/calvin/projects/linkchecker/test/output/base1.html
+error Error: [Errno 2] No such file or directory: '/home/calvin/projects/linkchecker/test/output/base1.html'
+url file:///home/calvin/projects/linkchecker/test/output/base2.html
+realurl file:/home/calvin/projects/linkchecker/test/output/base2.html
+error Error: [Errno 2] No such file or directory: '/home/calvin/projects/linkchecker/test/output/base2.html'
+url file:///home/calvin/projects/linkchecker/test/output/base3.html
+realurl file:/home/calvin/projects/linkchecker/test/output/base3.html
+error Error: [Errno 2] No such file or directory: '/home/calvin/projects/linkchecker/test/output/base3.html'
--- a/test/regrtest.py
+++ b/test/regrtest.py
@ -0,0 +1,248 @@
+#!/usr/bin/env python2
+
+# this file is _not_ the original Python2 regression test suite.
+
+"""Bastis Regression test.
+
+This will find all modules whose name is "test_*" in the test
+directory, and run them.  Various command line options provide
+additional facilities.
+
+Command line options:
+
+-v, --verbose
+        run tests in verbose mode with output to stdout
+-q, --quiet
+        don't print anything except if a test fails
+-g, --generate
+        write the output file for a test instead of comparing it
+-x, --exclude
+        arguments are tests to *exclude*
+-r, --random
+        randomize test execution order
+
+If non-option arguments are present, they are names for tests to run,
+unless -x is given, in which case they are names for tests not to run.
+If no test names are given, all tests are run.
+
+-v is incompatible with -g and does not compare test output files.
+"""
+
+import sys,getopt,os,string
+
+import test_support
+
+def main(tests=None, testdir=None, verbose=0, quiet=0, generate=0,
+         exclude=0, randomize=0):
+    """Execute a test suite.
+
+    This also parses command-line options and modifies its behavior
+    accordingly. 
+
+    tests -- a list of strings containing test names (optional)
+    testdir -- the directory in which to look for tests (optional)
+
+    Users other than the Python test suite will certainly want to
+    specify testdir; if it's omitted, the directory containing the
+    Python test suite is searched for.  
+
+    If the tests argument is omitted, the tests listed on the
+    command-line will be used.  If that's empty, too, then all *.py
+    files beginning with test_ will be used.
+
+    The other seven default arguments (verbose, quiet, generate, exclude,
+    single, randomize, and findleaks) allow programmers calling main()
+    directly to set the values that would normally be set by flags on the
+    command line.
+
+    """
+    
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],
+	    'vgqxsrl',
+	    ['verbose',
+	     'generate',
+	     'quiet',
+	     'exclude',
+	     'random',])
+    except getopt.error, msg:
+        error(msg)
+        usage()
+        return -1
+    for opt, val in opts:
+        if opt in ('-v','--verbose'): verbose = verbose + 1
+        if opt in ('-q','--quiet'): quiet = 1; verbose = 0
+        if opt in ('-g','--generate'): generate = 1
+        if opt in ('-x','--exclude'): exclude = 1
+        if opt in ('-r','--random'): randomize = 1
+    if generate and verbose:
+        print "-g and -v don't go together!"
+        return 2
+    good = []
+    bad = []
+    skipped = []
+
+    for i in range(len(args)):
+        # Strip trailing ".py" from arguments
+        if args[i][-3:] == '.py':
+            args[i] = args[i][:-3]
+    stdtests = STDTESTS[:]
+    nottests = NOTTESTS[:]
+    if exclude:
+        for arg in args:
+            if arg in stdtests:
+                stdtests.remove(arg)
+        nottests[:0] = args
+        args = []
+    tests = tests or args or findtests(testdir, stdtests, nottests)
+    if randomize:
+        random.shuffle(tests)
+    test_support.verbose = verbose      # Tell tests to be moderately quiet
+    save_modules = sys.modules.keys()
+    for test in tests:
+        if not quiet:
+            print test
+        ok = runtest(test, generate, verbose, quiet, testdir)
+        if ok > 0:
+            good.append(test)
+        elif ok == 0:
+            bad.append(test)
+        else:
+            skipped.append(test)
+        # Unload the newly imported modules (best effort finalization)
+        for module in sys.modules.keys():
+            if module not in save_modules and module.startswith("test."):
+                test_support.unload(module)
+    if good and not quiet:
+        if not bad and not skipped and len(good) > 1:
+            print "All",
+        print count(len(good), "test"), "OK."
+    if bad:
+        print count(len(bad), "test"), "failed:",
+        print string.join(bad)
+    if skipped and not quiet:
+        print count(len(skipped), "test"), "skipped:",
+        print string.join(skipped)
+
+    return len(bad) > 0
+
+STDTESTS = [
+    'test_base',
+#    'test_frames',
+   ]
+
+NOTTESTS = [
+    'test_support',
+    ]
+
+def findtests(testdir=None, stdtests=STDTESTS, nottests=NOTTESTS):
+    """Return a list of all applicable test modules."""
+    if not testdir: testdir = findtestdir()
+    names = os.listdir(testdir)
+    tests = []
+    for name in names:
+        if name[:5] == "test_" and name[-3:] == ".py":
+            modname = name[:-3]
+            if modname not in stdtests and modname not in nottests:
+                tests.append(modname)
+    tests.sort()
+    return stdtests + tests
+
+def runtest(test, generate, verbose, quiet, testdir = None):
+    """Run a single test.
+    test -- the name of the test
+    generate -- if true, generate output, instead of running the test
+    and comparing it to a previously created output file
+    verbose -- if true, print more messages
+    quiet -- if true, don't print 'skipped' messages (probably redundant)
+    testdir -- test directory
+    """
+    test_support.unload(test)
+    if not testdir: testdir = findtestdir()
+    outputdir = os.path.join(testdir, "output")
+    outputfile = os.path.join(outputdir, test)
+    try:
+        if generate:
+            cfp = open(outputfile, "w")
+        elif verbose:
+            cfp = sys.stdout
+        else:
+            cfp = Compare(outputfile)
+    except IOError:
+        cfp = None
+        print "Warning: can't open", outputfile
+    try:
+        save_stdout = sys.stdout
+        try:
+            if cfp:
+                sys.stdout = cfp
+                print test              # Output file starts with test name
+            __import__(test, globals(), locals(), [])
+            if cfp and not (generate or verbose):
+                cfp.close()
+        finally:
+            sys.stdout = save_stdout
+    except (ImportError, test_support.TestSkipped), msg:
+        if not quiet:
+            print "test", test,
+            print "skipped -- ", msg
+        return -1
+    except KeyboardInterrupt:
+        raise
+    except test_support.TestFailed, msg:
+        print "test", test, "failed --", msg
+        return 0
+    except:
+        type, value = sys.exc_info()[:2]
+        print "test", test, "crashed --", str(type) + ":", value
+        if verbose:
+            traceback.print_exc(file=sys.stdout)
+        return 0
+    else:
+        return 1
+
+
+def findtestdir():
+    if __name__ == '__main__':
+        file = sys.argv[0]
+    else:
+        file = __file__
+    testdir = os.path.dirname(file) or os.curdir
+    return testdir
+
+
+def count(n, word):
+    if n == 1:
+        return "%d %s" % (n, word)
+    else:
+        return "%d %ss" % (n, word)
+
+
+class Compare:
+
+    def __init__(self, filename):
+        self.fp = open(filename, 'r')
+
+    def write(self, data):
+        expected = self.fp.read(len(data))
+        if data <> expected:
+            raise test_support.TestFailed, \
+                    'Writing: '+`data`+', expected: '+`expected`
+
+    def writelines(self, listoflines):
+        map(self.write, listoflines)
+
+    def flush(self):
+        pass
+
+    def close(self):
+        leftover = self.fp.read()
+        if leftover:
+            raise test_support.TestFailed, 'Unread: '+`leftover`
+        self.fp.close()
+
+    def isatty(self):
+        return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/test/test1.html
+++ b/test/test1.html
@ -1,23 +0,0 @@
-Just some HTTP links
-<a href="http://www.garantiertnixgutt.bla">bad url</a>
-<a href="http://www.heise.de">ok</a>
-<a href="http:/www.heise.de">one slash</a>
-<a href="http:www.heise.de">no slash</a>
-<a href="http://">no url</a>
-<a href="http:/">no url, one slash</a>
-<a href="http:">no url, no slash</a>
-<a href="http://www.blubb.de/stalter&sohn">unquoted ampersand</a>
-<a name="iswas">anchor for test2.html</a>
-<a href=http://slashdot.org/>unquoted</a>
-<a href="http://treasure.calvinsplayground.de/~calvin/software/#isnix"
->invalid anchor</a>
-<a href="http://treasure.calvinsplayground.de/~calvin/isnich/"
->authorization (user=calvin, pass=calvin)</a>
-<a href="https://www.heise.de">https</a>
-<a href="HtTP://WWW.hEIsE.DE">should be cached</a>
-<a href="HTTP://WWW.HEISE.DE">should be cached</a>
-<!-- <a href=http://nocheckin> no check because of comment -->
-<a href=illegalquote1">no beginning quote</a>
-<a href="illegalquote2>no ending quote</a>
-<!-- check the parser at end of file -->
-<a href="g
--- a/test/test2.html
+++ b/test/test2.html
@ -1,19 +0,0 @@
-<!-- meta url -->
-<meta http-equiv="refresh" content="5; url=http://localhost">
-<a href="hutzli:nixgutt">         <!-- bad scheme -->
-<a href="javascript:loadthis()">  <!-- javascript (ignore) -->
-<a href="file:///etc/group">      <!-- good file -->
-<a href="file://etc/group">       <!-- bad file  -->
-<a href="file:/etc/group">        <!-- good file -->
-<a href="file:etc/group">         <!-- bad file -->
-<a href="file:/etc/">             <!-- good dir -->
-<a href="test1.html">             <!-- relative url -->
-<a href="test1.html#isnix">       <!-- bad anchor -->
-<a href="test1.html#iswas">       <!-- good anchor -->
-<a href="telnet:localhost">       <!-- telnet to localhost -->
-<a href="telnet:">                <!-- telnet without host -->
-<a href="ftp:/treasure.calvinsplayground.de/pub">   <!-- ftp one slash -->
-<a href="ftp://treasure.calvinsplayground.de/pub">  <!-- ftp two slashes -->
-<a href="ftp://treasure.calvinsplayground.de//pub"> <!-- ftp two dir slashes -->
-<a href="ftp://treasure.calvinsplayground.de////////pub"> <!-- ftp many dir slashes -->
-<a href="ftp:///treasure.calvinsplayground.de/pub"> <!-- ftp three slashes -->
--- a/test/test_base.py
+++ b/test/test_base.py
@ -0,0 +1,14 @@
+import os,sys
+sys.path.append(os.getcwd())
+import linkcheck
+config = linkcheck.Config.Configuration()
+config['recursionlevel'] = 1
+config['log'] = config.newLogger('test')
+config["anchors"] = 1
+config["verbose"] = 1
+config.disableThreading()
+htmldir = "test/html"
+for file in ('base1.html','base2.html','base3.html'):
+    url = os.path.join(htmldir, file)
+    config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0))
+linkcheck.checkUrls(config)
--- a/test/test_support.py
+++ b/test/test_support.py
@ -0,0 +1,72 @@
+"""Supporting definitions for the Python regression test."""
+
+
+class Error(Exception):
+        """Base class for regression test exceptions."""
+
+class TestFailed(Error):
+        """Test failed."""
+
+class TestSkipped(Error):
+        """Test skipped.
+
+        This can be raised to indicate that a test was deliberatly
+        skipped, but not because a feature wasn't available.  For
+        example, if some resource can't be used, such as the network
+        appears to be unavailable, this should be raised instead of
+        TestFailed.
+
+        """
+
+
+verbose = 1		# Flag set to 0 by regrtest.py
+
+def unload(name):
+	import sys
+	try:
+		del sys.modules[name]
+	except KeyError:
+		pass
+
+def forget(modname):
+	unload(modname)
+	import sys, os
+	for dirname in sys.path:
+		try:
+			os.unlink(os.path.join(dirname, modname + '.pyc'))
+		except os.error:
+			pass
+
+FUZZ = 1e-6
+
+def fcmp(x, y): # fuzzy comparison function
+	if type(x) == type(0.0) or type(y) == type(0.0):
+		try:
+			x, y = coerce(x, y)
+			fuzz = (abs(x) + abs(y)) * FUZZ
+			if abs(x-y) <= fuzz:
+				return 0
+		except:
+			pass
+	elif type(x) == type(y) and type(x) in (type(()), type([])):
+		for i in range(min(len(x), len(y))):
+			outcome = fcmp(x[i], y[i])
+			if outcome <> 0:
+				return outcome
+		return cmp(len(x), len(y))
+	return cmp(x, y)
+
+TESTFN = '@test' # Filename used for testing
+from os import unlink
+
+def findfile(file, here=__file__):
+	import os
+	if os.path.isabs(file):
+		return file
+	import sys
+	path = sys.path
+	path = [os.path.dirname(here)] + path
+	for dn in path:
+		fn = os.path.join(dn, file)
+		if os.path.exists(fn): return fn
+	return file
				`@ -0,0 +1 @@`
				`# Dummy file to make this directory a package.`