updated tests

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@324 e7d03fd6-7b0d-0410-9947-9c21f3af8025
2026-05-26 06:53:44 +00:00 · 2001-11-29 13:49:52 +00:00 · 2001-11-29 13:49:52 +00:00 · 14c9cbc4c4
commit 14c9cbc4c4
parent 63ec8723e2
12 changed files with 205 additions and 566 deletions
--- a/48
+++ b/48
@ -13,17 +13,49 @@ Python!

 Setup
 -----
-Run "python setup.py build" to build.
-Run "python setup.py install" to install.
-Run "python setup.py --help" for help.
-
 After installation, the system wide configuration file is in
 <prefix>/share/linkchecker/linkcheckerrc.
-The local configuration file is ~/.linkcheckerrc on Unix platforms.

-To run the program type "linkchecker" followed by your URLs you want
-to check.
-Type "linkchecker -h" for help.
+o Unix platforms
+  - open a commandline window and change to the linkchecker-x.x.x
+    directory
+  - run "python setup.py install" to install
+
+  For help on setup.py options, run "python setup.py --help".
+  The local configuration file is $HOME/.linkcheckerrc
+
+  To run the program type "linkchecker" followed by your URLs you want
+  to check.
+  Type "linkchecker -h" for help.
+
+
+o Windows platforms
+  - make sure that python.exe is in your PATH
+  - open a commandline window (cmd.exe) and change to the
+    linkchecker-x.x.x directory
+  - run "python.exe setup.py install" to install
+
+  For help on setup.py options, run "python.exe setup.py --help".
+
+  To run the program, change to the scripts directory of your python
+  installation and type "linkchecker.bat" followed by your URLs you
+  want to check.
+  Type "linkchecker.bat -h" for help.
+
+
+o MacOS 9.x platforms
+  - open the Python IDE
+  - open the setup.py file
+  - run it (click on "Run all")
+  - in the popup window, select the "install" command and click "Add"
+  - click "Ok"; this will copy files into the Python folder
+
+  Read the MacOS Python documentation to find out about passing
+  commandline options to Python scripts.
+
+
+o MacOS X platforms
+  - not tested


 (Fast)CGI web interface
--- a/debian/changelog
+++ b/debian/changelog
@ -1,3 +1,13 @@
+linkchecker (1.3.11) unstable; urgency=low
+
+  * setup.py: use os.getcwd(), not "." which breaks on MacOS 9.x
+  * added platform-specific install instructions
+  * use Pythons internal gettext module, get rid of fintl.py
+  * use Pythons internal robot.txt parser, get rid of
+    robotparser2.py
+
+ -- Bastian Kleineidam <calvin@debian.org>  Wed, 28 Nov 2001 17:57:20 +0100
+
 linkchecker (1.3.10) unstable; urgency=low

  * use Pythons builtin HTTPS support
--- a/linkcheck/HttpUrlData.py
+++ b/linkcheck/HttpUrlData.py
@ -16,7 +16,7 @@
 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

 import httplib, urlparse, sys, time, re
-import Config, StringUtil, robotparser2
+import Config, StringUtil, robotparser
 from UrlData import UrlData
 from urllib import splittype, splithost, splituser, splitpasswd
 from linkcheck import _
@ -251,7 +251,7 @@ class HttpUrlData(UrlData):
    def robotsTxtAllowsUrl(self, config):
        roboturl="%s://%s/robots.txt" % self.urlTuple[0:2]
        if not config.robotsTxtCache_has_key(roboturl):
-            rp = robotparser2.RobotFileParser(roboturl)
+            rp = robotparser.RobotFileParser(roboturl)
            rp.read()
            config.robotsTxtCache_set(roboturl, rp)
        rp = config.robotsTxtCache_get(roboturl)
--- a/linkcheck/init.py
+++ b/linkcheck/init.py
@ -19,28 +19,15 @@ class error(Exception):
    pass

 # i18n suppport
-LANG="EN" # default language (used for HTML output)
 import _linkchecker_configdata
 try:
-    import fintl,os,string
-    gettext = fintl.gettext
+    import gettext
    domain = 'linkcheck'
    localedir = os.path.join(_linkchecker_configdata.install_data, 'locale')
-    fintl.bindtextdomain(domain, localedir)
-    fintl.textdomain(domain)
-    languages = []
-    for envvar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
-        if os.environ.has_key(envvar):
-            languages = string.split(os.environ[envvar], ':')
-            break
-    if languages:
-        LANG=string.upper(languages[0])
-
-except ImportError:
-    def gettext(msg):
-        return msg
-# set _ as an alias for gettext
-_ = gettext
+    t = gettext.translation(domain, localedir)
+    _ = t.gettext
+except IOError:
+    _ = lambda s: s

 #import timeoutsocket
 #timeoutsocket.setDefaultSocketTimeout(20)
--- a/linkcheck/fintl.py
+++ b/linkcheck/fintl.py
@ -1,208 +0,0 @@
-## vim:ts=4:et:nowrap
-"""i18n (multiple language) support.  Reads .mo files from GNU gettext msgfmt
-
-If you want to prepare your Python programs for i18n you could simply
-add the following lines to the top of a BASIC_MAIN module of your py-program:
-    try:
-        import fintl
-        gettext = fintl.gettext
-        fintl.bindtextdomain(YOUR_PROGRAM, YOUR_LOCALEDIR)
-        fintl.textdomain(YOUR_PROGRAM)
-    except ImportError:
-        def gettext(msg):
-            return msg
-    _ = gettext
-and/or also add the following to the top of any module containing messages:
-    import BASIC_MAIN
-    _ = BASIC_MAIN.gettext
-            
-Now you could use _("....") everywhere instead of "...." for message texts.
-
-Once you have written your internationalized program, you can use
-the suite of utility programs contained in the GNU gettext package to aid
-the translation into other languages.  
-
-You ARE NOT REQUIRED to release the sourcecode of your program, since 
-linking of your program against GPL code is avoided by this module.  
-Although it is possible to use the GNU gettext library by using the 
-*intl.so* module written by Martin von Löwis if this is available.  But it is
-not required to use it in the  first place.
-"""
-# Copyright 1999 by <mailto: pf@artcom-gmbh.de> (Peter Funk)
-#  
-#                         All Rights Reserved
-#
-# Permission to use, copy, modify, and distribute this software and its
-# documentation for any purpose and without fee is hereby granted,
-# provided that the above copyright notice appear in all copies.
-
-# ArtCom GmbH AND Peter Funk DISCLAIMS ALL WARRANTIES WITH REGARD TO
-# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
-# AND FITNESS, IN NO EVENT SHALL ArtCom GmBH or Peter Funk BE LIABLE
-# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
-# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
-# OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-_default_localedir = '/usr/share/locale'
-_default_domain = 'python'
-
-# check out, if Martin v. Löwis 'intl' module interface to the GNU gettext
-# library is available and use it only, if it is available: 
-try:
-    from intl import *
-except ImportError:
-    # now do what the gettext library provides in pure Python:
-    error = 'fintl.error'
-    # some globals preserving state:
-    _languages = []
-    _default_mo = None # This is default message outfile used by 'gettext'
-    _loaded_mos = {}   # This is a dictionary of loaded message output files
-
-    # some small little helper routines:
-    def _check_env():
-        """examine language enviroment variables and return list of languages"""
-        # TODO: This should somehow try to find out locale information on
-        #       Non-unix platforms like WinXX and MacOS.  Suggestions welcome!
-        languages = []
-        import os, string
-        for envvar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
-            if os.environ.has_key(envvar):
-                languages = string.split(os.environ[envvar], ':')
-                break
-        # use locale 'C' as default fallback:
-        if 'C' not in _languages:
-            languages.append('C')
-        return languages
-
-    # Utility function used to decode binary .mo file header and seek tables:
-    def _decode_Word(bin):
-        # This assumes little endian (intel, vax) byte order.
-        return  ord(bin[0])        + (ord(bin[1]) <<  8) + \
-               (ord(bin[2]) << 16) + (ord(bin[3]) << 24)
-
-    # Now the methods designed to be used from outside:
-
-    def gettext(message):
-        """return localized version of a 'message' string"""
-        if _default_mo is None: 
-            textdomain()
-        return _default_mo.gettext(message)
-
-    _ = gettext
-
-    def dgettext(domain, message):
-        """like gettext but looks up 'message' in a special 'domain'"""
-        # This may useful for larger software systems
-        if not _loaded_mos.has_key(domain):
-            raise error, "No '" + domain + "' message domain"
-        return _loaded_mos[domain].gettext(message)
-
-    class _MoDict:
-        """read a .mo file into a python dictionary"""
-        __MO_MAGIC = 0x950412de # Magic number of .mo files
-        def __init__(self, domain=_default_domain, localedir=_default_localedir):
-            global _languages
-            self.catalog = {}
-            self.domain = domain
-            self.localedir = localedir
-            # delayed access to environment variables:
-            if not _languages:
-                _languages = _check_env()
-            for self.lang in _languages:
-                if self.lang == 'C':
-                    return
-                mo_filename = "%s//%s/LC_MESSAGES/%s.mo" % (
-                                                  localedir, self.lang, domain)
-                try:
-                     buffer = open(mo_filename, "rb").read()
-                     break
-                except IOError:
-                     pass
-            else:
-                return # assume C locale
-            # Decode the header of the .mo file (5 little endian 32 bit words):
-            if _decode_Word(buffer[:4]) != self.__MO_MAGIC :
-                raise error, '%s seems not be a valid .mo file' % mo_filename
-            self.mo_version = _decode_Word(buffer[4:8])
-            num_messages    = _decode_Word(buffer[8:12])
-            master_index    = _decode_Word(buffer[12:16])
-            transl_index    = _decode_Word(buffer[16:20])
-            buf_len = len(buffer)
-            # now put all messages from the .mo file buffer in the catalog dict:
-            for i in xrange(0, num_messages):
-                start_master= _decode_Word(buffer[master_index+4:master_index+8])
-                end_master  = start_master + \
-                              _decode_Word(buffer[master_index:master_index+4])
-                start_transl= _decode_Word(buffer[transl_index+4:transl_index+8])
-                end_transl  = start_transl + \
-                              _decode_Word(buffer[transl_index:transl_index+4])
-                if end_master <= buf_len and end_transl <= buf_len:
-                    self.catalog[buffer[start_master:end_master]]=\
-                                 buffer[start_transl:end_transl]
-                else: 
-                    raise error, ".mo file '%s' is corrupt" % mo_filename
-                # advance to the next entry in seek tables:
-                master_index += 8
-                transl_index += 8
-
-        def gettext(self, message):
-            """return the translation of a given message"""
-            try:
-                return self.catalog[message]
-            except KeyError:
-                return message
-        # _MoDict instances may be also accessed using mo[msg] or mo(msg):
-        __getitem = gettext
-        __call__ = gettext
-
-    def textdomain(domain=_default_domain):
-        """Sets the 'domain' to be used by this program. Defaults to 'python'"""
-        global _default_mo
-        if not _loaded_mos.has_key(domain):
-             _loaded_mos[domain] = _MoDict(domain)
-        _default_mo = _loaded_mos[domain]
-
-    def bindtextdomain(domain, localedir=_default_localedir):
-        global _default_mo
-        if not _loaded_mos.has_key(domain):
-            _loaded_mos[domain] = _MoDict(domain, localedir)
-        if _default_mo is not None: 
-            _default_mo = _loaded_mos[domain]
-
-    def translator(domain=_default_domain, localedir=_default_localedir):
-        """returns a gettext compatible function object
-        
-           which is bound to the domain given as parameter"""
-        pass  # TODO implement this 
-
-def _testdriver(argv):
-    message   = ""
-    domain    = _default_domain
-    localedir = _default_localedir
-    if len(argv) > 1:
-        message = argv[1]
-        if len(argv) > 2:
-            domain = argv[2]
-            if len(argv) > 3:
-                localedir = argv[3]
-    # now perform some testing of this module:
-    bindtextdomain(domain, localedir)
-    textdomain(domain)
-    info = gettext('')  # this is where special info is often stored
-    if info:
-        print ".mo file for domain %s in %s contains:" % (domain, localedir)
-        print info
-    else:
-        print ".mo file contains no info"
-    if message:
-        print "Translation of '"+ message+ "' is '"+ _(message)+ "'"
-    else:
-        for msg in ("Cancel", "No", "OK", "Quit", "Yes"):
-            print "Translation of '"+ msg + "' is '"+ _(msg)+ "'"
-
-if __name__ == '__main__':
-    import sys
-    if len(sys.argv) > 1 and (sys.argv[1] == "-h" or sys.argv[1] == "-?"):
-        print "Usage :", sys.argv[0], "[ MESSAGE [ DOMAIN [ LOCALEDIR ]]]"
-    _testdriver(sys.argv)
--- a/linkcheck/robotparser2.py
+++ b/linkcheck/robotparser2.py
@ -1,250 +0,0 @@
-""" implements the robots.txt inclusion protocol
-
-    Copyright (C) 2000  Bastian Kleineidam
-
-    You can choose between two licenses when using this package:
-    1) GNU GPLv2
-    2) PYTHON 2.0 OPEN SOURCE LICENSE
-
-    The robots.txt Exclusion Protocol is implemented as specified in
-    http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
-"""
-import re,string,urlparse,urllib
-
-__all__ = ['RobotFileParser']
-
-debug = 0
-
-def _debug(msg):
-    if debug: print msg
-
-
-class RobotFileParser:
-    def __init__(self, url=''):
-        self.entries = []
-        self.disallow_all = 0
-        self.allow_all = 0
-        self.set_url(url)
-        self.last_checked = 0
-
-    def mtime(self):
-        return self.last_checked
-
-    def modified(self):
-        import time
-        self.last_checked = time.time()
-
-    def set_url(self, url):
-        self.url = url
-        self.host, self.path = urlparse.urlparse(url)[1:3]
-
-    def read(self):
-        import httplib
-        tries = 0
-        # limit number of redirections to 5
-        while tries<5:
-            connection = httplib.HTTP(self.host)
-            connection.putrequest("GET", self.path)
-            connection.putheader("Host", self.host)
-            connection.endheaders()
-            status, text, mime = connection.getreply()
-            if status in [301,302] and mime:
-                tries = tries + 1
-                newurl = mime.get("Location", mime.get("Uri", ""))
-                newurl = urlparse.urljoin(self.url, newurl)
-                self.set_url(newurl)
-            else:
-                break
-        if status==401 or status==403:
-            self.disallow_all = 1
-        elif status>=400:
-            self.allow_all = 1
-        else:
-	    # status < 400
-            self.parse(connection.getfile().readlines())
-
-    def parse(self, lines):
-        """parse the input lines from a robot.txt file.
-	   We allow that a user-agent: line is not preceded by
-	   one or more blank lines."""
-        state = 0
-        linenumber = 0
-        entry = Entry()
-        
-        for line in lines:
-            line = string.strip(line)
-            linenumber = linenumber + 1
-            if not line:
-                if state==1:
-                    _debug("line %d: warning: you should insert"
-		           " allow: or disallow: directives below any"
-			   " user-agent: line" % linenumber)
-                    entry = Entry()
-                    state = 0
-                elif state==2:
-                    self.entries.append(entry)
-                    entry = Entry()
-                    state = 0
-            # remove optional comment and strip line
-            i = string.find(line, '#')
-            if i>=0:
-                line = line[:i]
-            line = string.strip(line)
-            if not line:
-                continue
-            line = string.split(line, ':', 1)
-            if len(line) == 2:
-                line[0] = string.lower(string.strip(line[0]))
-                line[1] = string.strip(line[1])
-                if line[0] == "user-agent":
-                    if state==2:
-                        _debug("line %d: warning: you should insert a blank"
-			       " line before any user-agent"
-                               " directive" % linenumber)
-                        self.entries.append(entry)
-                        entry = Entry()
-                    entry.useragents.append(line[1])
-                    state = 1
-                elif line[0] == "disallow":
-                    if state==0:
-                        _debug("line %d: error: you must insert a user-agent:"
-			       " directive before this line" % linenumber)
-                    else:
-                        entry.rulelines.append(RuleLine(line[1], 0))
-                        state = 2
-                elif line[0] == "allow":
-                    if state==0:
-                        _debug("line %d: error: you must insert a user-agent:"
-			       " directive before this line" % linenumber)
-                    else:
-                        entry.rulelines.append(RuleLine(line[1], 1))
-                else:
-                    _debug("line %d: warning: unknown key %s" % (linenumber,
-                               line[0]))
-            else:
-                _debug("line %d: error: malformed line %s"%(linenumber, line))
-        if state==2:
-            self.entries.append(entry)
-        _debug("Parsed rules:\n%s" % str(self))
-
-
-    def can_fetch(self, useragent, url):
-        """using the parsed robots.txt decide if useragent can fetch url"""
-        _debug("Checking robot.txt allowance for\n%s\n%s" % (useragent, url))
-        if self.disallow_all:
-            return 0
-        if self.allow_all:
-            return 1
-        # search for given user agent matches
-        # the first match counts
-        url = urllib.quote(urlparse.urlparse(url)[2]) or "/"
-        for entry in self.entries:
-            if entry.applies_to(useragent):
-                return entry.allowance(url)
-        # agent not found ==> access granted
-        return 1
-
-
-    def __str__(self):
-        ret = ""
-        for entry in self.entries:
-            ret = ret + str(entry) + "\n"
-        return ret
-
-
-class RuleLine:
-    """A rule line is a single "Allow:" (allowance==1) or "Disallow:"
-       (allowance==0) followed by a path."""
-    def __init__(self, path, allowance):
-        self.path = urllib.quote(path)
-        self.allowance = allowance
-
-    def applies_to(self, filename):
-        return self.path=="*" or re.match(self.path, filename)
-
-    def __str__(self):
-        return (self.allowance and "Allow" or "Disallow")+": "+self.path
-
-
-class Entry:
-    """An entry has one or more user-agents and zero or more rulelines"""
-    def __init__(self):
-        self.useragents = []
-        self.rulelines = []
-
-    def __str__(self):
-        ret = ""
-        for agent in self.useragents:
-            ret = ret + "User-agent: "+agent+"\n"
-        for line in self.rulelines:
-            ret = ret + str(line) + "\n"
-        return ret
-
-    def applies_to(self, useragent):
-        """check if this entry applies to the specified agent"""
-        # split the name token and make it lower case
-        useragent = string.lower(string.split(useragent,"/")[0])
-        for agent in self.useragents:
-            if agent=='*':
-                # we have the catch-all agent
-                return 1
-            agent = string.lower(agent)
-            # don't forget to re.escape
-            if re.search(re.escape(useragent), agent):
-                return 1
-        return 0
-
-    def allowance(self, filename):
-        """Preconditions:
-        - our agent applies to this entry
-        - filename is URL decoded"""
-        for line in self.rulelines:
-            _debug((filename, str(line), line.allowance))
-            if line.applies_to(filename):
-                return line.allowance
-        return 1
-
-def _check(a,b):
-    if a!=b:
-        print "failed\n"
-    else:
-        print "ok\n"
-
-def _test():
-    global debug
-    import sys
-    rp = RobotFileParser()
-    debug = 1
-    if len(sys.argv) <= 1:
-        rp.set_url('http://www.musi-cal.com/robots.txt')
-        rp.read()
-    else:
-        rp.parse(open(sys.argv[1]).readlines())
-    # test for re.escape
-    _check(rp.can_fetch('*', 'http://www.musi-cal.com/'), 1)
-    # empty url path
-    _check(rp.can_fetch('*', 'http://www.musi-cal.com'), 1)
-    # this should match the first rule, which is a disallow
-    _check(rp.can_fetch('', 'http://www.musi-cal.com/'), 0)
-    # various cherry pickers
-    _check(rp.can_fetch('CherryPickerSE',
-                       'http://www.musi-cal.com/cgi-bin/event-search'
-		       '?city=San+Francisco'), 0)
-    _check(rp.can_fetch('CherryPickerSE/1.0',
-                       'http://www.musi-cal.com/cgi-bin/event-search'
-		       '?city=San+Francisco'), 0)
-    _check(rp.can_fetch('CherryPickerSE/1.5',
-                       'http://www.musi-cal.com/cgi-bin/event-search'
-		       '?city=San+Francisco'), 0)
-    # case sensitivity
-    _check(rp.can_fetch('ExtractorPro', 'http://www.musi-cal.com/blubba'), 0)
-    _check(rp.can_fetch('extractorpro', 'http://www.musi-cal.com/blubba'), 0)
-    # substring test
-    _check(rp.can_fetch('toolpak/1.1', 'http://www.musi-cal.com/blubba'), 0)
-    # tests for catch-all * agent
-    _check(rp.can_fetch('spam', 'http://www.musi-cal.com/musician/me'), 0)
-    _check(rp.can_fetch('spam', 'http://www.musi-cal.com/Musician/me'), 1)
-    _check(rp.can_fetch('spam', 'http://www.musi-cal.com/'), 1)
-
-if __name__ == '__main__':
-    _test()
--- a/linkchecker.bat
+++ b/linkchecker.bat
@ -19,4 +19,9 @@ rem uncomment the next line to enable german output
 rem set LC_MESSAGES=de
 rem uncomment the next line to enable french output
 rem set LC_MESSAGES=fr
+
+rem If you see $python or $install_scripts on the next line, then you
+rem are looking at a skeleton .bat file suited only for installation.
+rem Look in c:\python21\scripts or wherever Python is installed for
+rem the executable .bat file.
 $python -O linkchecker --interactive %*
--- a/po/Makefile
+++ b/po/Makefile
@ -6,8 +6,10 @@ MSGFMT=$(PYTHON) $(I18NTOOLS)/msgfmt.py
 #MSGFMT=msgfmt
 MSGMERGE=msgmerge
 SOURCES=\
+../linkcheck/ChromeUrlData.py \
 ../linkcheck/Config.py \
 ../linkcheck/FileUrlData.py \
+../linkcheck/FindUrlData.py \
 ../linkcheck/FtpUrlData.py \
 ../linkcheck/GopherUrlData.py \
 ../linkcheck/HostCheckingUrlData.py \
--- a/po/msgfmt.py
+++ b/po/msgfmt.py
@ -1,4 +1,4 @@
-#! /usr/bin/env python2
+#!/usr/bin/python

 # Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>

@ -11,6 +11,11 @@ GNU msgfmt program, however, it is a simpler implementation.
 Usage: msgfmt.py [OPTIONS] filename.po

 Options:
+    -o file
+    --output-file=file
+        Specify the output file to write to.  If omitted, output will go to a
+        file named filename.mo (based off the input file name).
+
    -h
    --help
        Print this message and exit.
@ -18,15 +23,15 @@ Options:
    -V
    --version
        Display version information and exit.
-
 """

 import sys
+import os
 import getopt
 import struct
 import array

-__version__ = "1.0"
+__version__ = "1.1"

 MESSAGES = {}

@ -91,17 +96,18 @@ def generate():



-def make(filename):
+def make(filename, outfile):
    ID = 1
    STR = 2

-    # Compute .mo name from .po name
+    # Compute .mo name from .po name and arguments
    if filename.endswith('.po'):
        infile = filename
-        outfile = filename[:-2] + 'mo'
    else:
        infile = filename + '.po'
-        outfile = filename + '.mo'
+    if outfile is None:
+        outfile = os.path.splitext(infile)[0] + '.mo'
+
    try:
        lines = open(infile).readlines()
    except IOError, msg:
@ -159,7 +165,6 @@ def make(filename):
    # Compute output
    output = generate()

-    # Save output
    try:
        open(outfile,"wb").write(output)
    except IOError,msg:
@ -169,10 +174,12 @@ def make(filename):

 def main():
    try:
-        opts, args = getopt.getopt(sys.argv[1:], 'hV', ['help','version'])
+        opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
+                                   ['help', 'version', 'output-file='])
    except getopt.error, msg:
        usage(1, msg)

+    outfile = None
    # parse options
    for opt, arg in opts:
        if opt in ('-h', '--help'):
@ -180,6 +187,8 @@ def main():
        elif opt in ('-V', '--version'):
            print >> sys.stderr, "msgfmt.py", __version__
            sys.exit(0)
+        elif opt in ('-o', '--output-file'):
+            outfile = arg
    # do it
    if not args:
        print >> sys.stderr, 'No input file given'
@ -187,7 +196,7 @@ def main():
        return

    for filename in args:
-        make(filename)
+        make(filename, outfile)


 if __name__ == '__main__':
--- a/po/pygettext.py
+++ b/po/pygettext.py
@ -1,18 +1,13 @@
-#! /usr/bin/env python2
+#!/usr/bin/python
 # Originally written by Barry Warsaw <bwarsaw@python.org>
 #
 # minimally patched to make it even more xgettext compatible 
 # by Peter Funk <pf@artcom-gmbh.de>
+#
+# even more minimalistic patched to fix the default-domain= option
+# by Bastian Kleineidam <calvin@users.sourceforge.net>

-# for selftesting
-try:
-    import fintl
-    _ = fintl.gettext
-except ImportError:
-    def _(s): return s
-
-
-__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
+"""pygettext -- Python equivalent of xgettext(1)

 Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
 internationalization of C programs.  Most of these tools are independent of
@ -65,7 +60,13 @@ Options:

    -E
    --escape
-        replace non-ASCII characters with octal escape sequences.
+        Replace non-ASCII characters with octal escape sequences.
+
+    -D
+    --docstrings
+        Extract module, class, method, and function docstrings.  These do not
+        need to be wrapped in _() markers, and in fact cannot be for Python to
+        consider them docstrings.

    -h
    --help
@ -93,6 +94,15 @@ Options:
        each msgid.  The style of comments is controlled by the -S/--style
        option.  This is the default.

+    -o filename
+    --output=filename
+        Rename the default output file from messages.pot to filename.  If
+        filename is `-' then the output is sent to standard out.
+
+    -p dir
+    --output-dir=dir
+        Output files will be placed in directory dir.
+
    -S stylename
    --style stylename
        Specify which style to use for location comments.  Two styles are
@ -103,15 +113,6 @@ Options:

        The style name is case insensitive.  GNU style is the default.

-    -o filename
-    --output=filename
-        Rename the default output file from messages.pot to filename.  If
-        filename is `-' then the output is sent to standard out.
-
-    -p dir
-    --output-dir=dir
-        Output files will be placed in directory dir.
-
    -v
    --verbose
        Print the names of the files being processed.
@ -132,7 +133,7 @@ Options:

 If `inputfile' is -, standard input is read.

-""")
+"""

 import os
 import sys
@ -140,7 +141,14 @@ import time
 import getopt
 import tokenize

-__version__ = '1.1'
+# for selftesting
+try:
+    import fintl
+    _ = fintl.gettext
+except ImportError:
+    def _(s): return s
+
+__version__ = '1.3'

 default_keywords = ['_']
 DEFAULTKEYWORDS = ', '.join(default_keywords)
@ -159,7 +167,8 @@ pot_header = _('''\
 msgid ""
 msgstr ""
 "Project-Id-Version: PACKAGE VERSION\\n"
-"PO-Revision-Date: %(time)s\\n"
+"POT-Creation-Date: %(time)s\\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
 "Language-Team: LANGUAGE <LL@li.org>\\n"
 "MIME-Version: 1.0\\n"
@ -171,9 +180,9 @@ msgstr ""


 def usage(code, msg=''):
-    print __doc__ % globals()
+    print >> sys.stderr, _(__doc__) % globals()
    if msg:
-        print msg
+        print >> sys.stderr, msg
    sys.exit(code)


@ -239,15 +248,48 @@ class TokenEater:
        self.__state = self.__waiting
        self.__data = []
        self.__lineno = -1
+        self.__freshmodule = 1

    def __call__(self, ttype, tstring, stup, etup, line):
        # dispatch
+##        import token
+##        print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
+##              'tstring:', tstring
        self.__state(ttype, tstring, stup[0])

    def __waiting(self, ttype, tstring, lineno):
+        # Do docstring extractions, if enabled
+        if self.__options.docstrings:
+            # module docstring?
+            if self.__freshmodule:
+                if ttype == tokenize.STRING:
+                    self.__addentry(safe_eval(tstring), lineno)
+                    self.__freshmodule = 0
+                elif ttype not in (tokenize.COMMENT, tokenize.NL):
+                    self.__freshmodule = 0
+                return
+            # class docstring?
+            if ttype == tokenize.NAME and tstring in ('class', 'def'):
+                self.__state = self.__suiteseen
+                return
        if ttype == tokenize.NAME and tstring in self.__options.keywords:
            self.__state = self.__keywordseen

+    def __suiteseen(self, ttype, tstring, lineno):
+        # ignore anything until we see the colon
+        if ttype == tokenize.OP and tstring == ':':
+            self.__state = self.__suitedocstring
+
+    def __suitedocstring(self, ttype, tstring, lineno):
+        # ignore any intervening noise
+        if ttype == tokenize.STRING:
+            self.__addentry(safe_eval(tstring), lineno)
+            self.__state = self.__waiting
+        elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
+                           tokenize.COMMENT):
+            # there was no class docstring
+            self.__state = self.__waiting
+
    def __keywordseen(self, ttype, tstring, lineno):
        if ttype == tokenize.OP and tstring == '(':
            self.__data = []
@ -263,58 +305,54 @@ class TokenEater:
            # of messages seen.  Reset state for the next batch.  If there
            # were no strings inside _(), then just ignore this entry.
            if self.__data:
-                msg = EMPTYSTRING.join(self.__data)
-                if not msg in self.__options.toexclude:
-                    entry = (self.__curfile, self.__lineno)
-                    linenos = self.__messages.get(msg)
-                    if linenos is None:
-                        self.__messages[msg] = [entry]
-                    else:
-                        linenos.append(entry)
+                self.__addentry(EMPTYSTRING.join(self.__data))
            self.__state = self.__waiting
        elif ttype == tokenize.STRING:
            self.__data.append(safe_eval(tstring))
        # TBD: should we warn if we seen anything else?

+    def __addentry(self, msg, lineno=None):
+        if lineno is None:
+            lineno = self.__lineno
+        if not msg in self.__options.toexclude:
+            entry = (self.__curfile, lineno)
+            self.__messages.setdefault(msg, []).append(entry)
+
    def set_filename(self, filename):
        self.__curfile = filename

    def write(self, fp):
        options = self.__options
        timestamp = time.ctime(time.time())
-        # common header
-        try:
-            sys.stdout = fp
-            # The time stamp in the header doesn't have the same format
-            # as that generated by xgettext...
-            print pot_header % {'time': timestamp, 'version': __version__}
-            for k, v in self.__messages.items():
-                if not options.writelocations:
-                    pass
-                # location comments are different b/w Solaris and GNU:
-                elif options.locationstyle == options.SOLARIS:
-                    for filename, lineno in v:
-                        d = {'filename': filename, 'lineno': lineno}
-                        print _('# File: %(filename)s, line: %(lineno)d') % d
-                elif options.locationstyle == options.GNU:
-                    # fit as many locations on one line, as long as the
-                    # resulting line length doesn't exceeds 'options.width'
-                    locline = '#:'
-                    for filename, lineno in v:
-                        d = {'filename': filename, 'lineno': lineno}
-                        s = _(' %(filename)s:%(lineno)d') % d
-                        if len(locline) + len(s) <= options.width:
-                            locline = locline + s
-                        else:
-                            print locline
-                            locline = "#:" + s
-                    if len(locline) > 2:
-                        print locline
-                # TBD: sorting, normalizing
-                print 'msgid', normalize(k)
-                print 'msgstr ""\n'
-        finally:
-            sys.stdout = sys.__stdout__
+        # The time stamp in the header doesn't have the same format as that
+        # generated by xgettext...
+        print >> fp, pot_header % {'time': timestamp, 'version': __version__}
+        for k, v in self.__messages.items():
+            if not options.writelocations:
+                pass
+            # location comments are different b/w Solaris and GNU:
+            elif options.locationstyle == options.SOLARIS:
+                for filename, lineno in v:
+                    d = {'filename': filename, 'lineno': lineno}
+                    print >>fp, _('# File: %(filename)s, line: %(lineno)d') % d
+            elif options.locationstyle == options.GNU:
+                # fit as many locations on one line, as long as the
+                # resulting line length doesn't exceeds 'options.width'
+                locline = '#:'
+                for filename, lineno in v:
+                    d = {'filename': filename, 'lineno': lineno}
+                    s = _(' %(filename)s:%(lineno)d') % d
+                    if len(locline) + len(s) <= options.width:
+                        locline = locline + s
+                    else:
+                        print >> fp, locline
+                        locline = "#:" + s
+                if len(locline) > 2:
+                    print >> fp, locline
+            # TBD: sorting, normalizing
+            print >> fp, 'msgid', normalize(k)
+            print >> fp, 'msgstr ""\n'
+


 def main():
@ -322,11 +360,12 @@ def main():
    try:
        opts, args = getopt.getopt(
            sys.argv[1:],
-            'ad:Ehk:Kno:p:S:Vvw:x:',
+            'ad:DEhk:Kno:p:S:Vvw:x:',
            ['extract-all', 'default-domain=', 'escape', 'help',
             'keyword=', 'no-default-keywords',
             'add-location', 'no-location', 'output=', 'output-dir=',
             'style=', 'verbose', 'version', 'width=', 'exclude-file=',
+             'docstrings',
             ])
    except getopt.error, msg:
        usage(1, msg)
@ -347,6 +386,7 @@ def main():
        verbose = 0
        width = 78
        excludefilename = ''
+        docstrings = 0

    options = Options()
    locations = {'gnu' : options.GNU,
@ -363,6 +403,8 @@ def main():
            options.outfile = arg + '.pot'
        elif opt in ('-E', '--escape'):
            options.escape = 1
+        elif opt in ('-D', '--docstrings'):
+            options.docstrings = 1
        elif opt in ('-k', '--keyword'):
            options.keywords.append(arg)
        elif opt in ('-K', '--no-default-keywords'):
@ -426,7 +468,11 @@ def main():
            closep = 1
        try:
            eater.set_filename(filename)
-            tokenize.tokenize(fp.readline, eater)
+            try:
+                tokenize.tokenize(fp.readline, eater)
+            except tokenize.TokenError, e:
+                sys.stderr.write('%s: %s, line %d, column %d\n' %
+                                 (e[0], filename, e[1][0], e[1][1]))
        finally:
            if closep:
                fp.close()
--- a/setup.py
+++ b/setup.py
@ -94,12 +94,14 @@ class MyDistribution(Distribution):
        data = []
 	data.append('config_dir = %s' % `os.path.join(cwd, "config")`)
        data.append("install_data = %s" % `cwd`)
-        self.create_conf_file(".", data)
+        self.create_conf_file("", data)
        Distribution.run_commands(self)


    def create_conf_file(self, directory, data=[]):
        data.insert(0, "# this file is automatically created by setup.py")
+        if not directory:
+            directory = os.getcwd()
        filename = os.path.join(directory, self.config_file)
        # add metadata
        metanames = ("name", "version", "author", "author_email",
@ -127,7 +129,7 @@ myname = "Bastian Kleineidam"
 myemail = "calvin@users.sourceforge.net"

 setup (name = "linkchecker",
-       version = "1.3.10",
+       version = "1.3.11",
       description = "check HTML documents for broken links",
       author = myname,
       author_email = myemail,
--- a/test/output/test_http
+++ b/test/output/test_http
@ -3,9 +3,11 @@ url file:///home/calvin/projects/linkchecker/test/html/http.html
 valid
 url http://www.garantiertnixgutt.bla
 name bad url
+warning Missing '/' at end of URL
 error
 url http://www.heise.de
 name ok
+warning Missing '/' at end of URL
 valid
 url http:/www.heise.de
 name one slash
@ -38,10 +40,12 @@ valid
 url HtTP://WWW.hEIsE.DE
 cached
 name should be cached
+warning Missing '/' at end of URL
 valid
 url HTTP://WWW.HEISE.DE
 cached
 name should be cached
+warning Missing '/' at end of URL
 valid
 url http://www.heise.de/?quoted=ü
 name html entities