documentation

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2160 e7d03fd6-7b0d-0410-9947-9c21f3af8025
2026-04-24 16:14:45 +00:00 · 2005-01-19 01:04:38 +00:00 · 2005-01-19 01:04:38 +00:00 · b008747f39
commit b008747f39
parent 46de8b6ca0
14 changed files with 326 additions and 139 deletions
--- a/linkcheck/checker/cache.py
+++ b/linkcheck/checker/cache.py
@ -32,7 +32,9 @@ import linkcheck.threader


 def _check_morsel (m, host, path):
-    """check given cookie morsel against the desired host and path"""
+    """
+    Check given cookie morsel against the desired host and path.
+    """
    # check domain (if its stored)
    if m["domain"] and not host.endswith(m["domain"]):
        return None
@ -48,15 +50,18 @@ def _check_morsel (m, host, path):


 class Cache (object):
-    """Store and provide routines for cached data. Currently there are
-       caches for cookies, checked urls, FTP connections and robots.txt
-       contents.
+    """
+    Store and provide routines for cached data. Currently there are
+    caches for cookies, checked urls, FTP connections and robots.txt
+    contents.

-       All public operations (except __init__()) are thread-safe.
+    All public operations (except __init__()) are thread-safe.
    """

    def __init__ (self):
-        """Initialize the default options"""
+        """
+        Initialize the default options.
+        """
        # one big lock for all caches and queues
        self.lock = threading.Lock()
        # already checked urls
@ -81,9 +86,11 @@ class Cache (object):
            self.lock.release()

    def incoming_get_url (self):
-        """Get first not-in-progress url from the incoming queue and
-           return it. If no such url is available return None. The
-           url might be already cached."""
+        """
+        Get first not-in-progress url from the incoming queue and
+        return it. If no such url is available return None. The
+        url might be already cached.
+        """
        self.lock.acquire()
        try:
            for i, url_data in enumerate(self.incoming):
@ -102,7 +109,9 @@ class Cache (object):
            self.lock.release()

    def incoming_len (self):
-        """return number of entries in incoming queue"""
+        """
+        Return number of entries in incoming queue.
+        """
        self.lock.acquire()
        try:
            return len(self.incoming)
@ -110,7 +119,9 @@ class Cache (object):
            self.lock.release()

    def incoming_add (self, url_data):
-        """add a new URL to list of URLs to check"""
+        """
+        Add a new URL to list of URLs to check.
+        """
        self.lock.acquire()
        try:
            linkcheck.log.debug(linkcheck.LOG_CACHE,
@ -171,10 +182,12 @@ class Cache (object):
            self.lock.release()

    def checked_redirect (self, redirect, url_data):
-        """Check if redirect is already in cache. Used for URL redirections
-           to avoid double checking of already cached URLs.
-           If the redirect URL is found in the cache, the result data is
-           already copied."""
+        """
+        Check if redirect is already in cache. Used for URL redirections
+        to avoid double checking of already cached URLs.
+        If the redirect URL is found in the cache, the result data is
+        already copied.
+        """
        self.lock.acquire()
        try:
            if redirect in self.checked:
@ -185,7 +198,9 @@ class Cache (object):
            self.lock.release()

    def robots_txt_allows_url (self, roboturl, url, user, password):
-        """ask robots.txt allowance"""
+        """
+        Ask robots.txt allowance.
+        """
        self.lock.acquire()
        try:
            if roboturl not in self.robots_txt:
@ -201,8 +216,9 @@ class Cache (object):
            self.lock.release()

    def get_ftp_connection (self, host, username, password):
-        """Get open FTP connection to given host. Return None if no such
-           connection is available.
+        """
+        Get open FTP connection to given host. Return None if no such
+        connection is available.
        """
        self.lock.acquire()
        try:
@ -218,7 +234,9 @@ class Cache (object):
            self.lock.release()

    def add_ftp_connection (self, host, username, password, conn):
-        """Store open FTP connection into cache for reuse."""
+        """
+        Store open FTP connection into cache for reuse.
+        """
        self.lock.acquire()
        try:
            key = (host, username, password)
@ -230,7 +248,9 @@ class Cache (object):
            self.lock.release()

    def release_ftp_connection (self, host, username, password):
-        """Store open FTP connection into cache for reuse."""
+        """
+        Store open FTP connection into cache for reuse.
+        """
        self.lock.acquire()
        try:
            key = (host, username, password)
@ -239,8 +259,9 @@ class Cache (object):
            self.lock.release()

    def store_cookies (self, headers, host):
-        """Thread-safe cookie cache setter function. Can raise the
-           exception Cookie.CookieError.
+        """
+        Thread-safe cookie cache setter function. Can raise the
+        exception Cookie.CookieError.
        """
        self.lock.acquire()
        try:
@ -255,7 +276,9 @@ class Cache (object):
            self.lock.release()

    def get_cookies (self, host, path):
-        """Thread-safe cookie cache getter function."""
+        """
+        Thread-safe cookie cache getter function.
+        """
        self.lock.acquire()
        try:
            linkcheck.log.debug(linkcheck.LOG_CACHE,
--- a/linkcheck/checker/consumer.py
+++ b/linkcheck/checker/consumer.py
@ -1,5 +1,7 @@
 # -*- coding: iso-8859-1 -*-
-"""url consumer class"""
+"""
+Url consumer class.
+"""
 # Copyright (C) 2000-2005  Bastian Kleineidam
 #
 # This program is free software; you can redistribute it and/or modify
@ -28,10 +30,14 @@ import linkcheck.log
 from urlbase import stderr

 class Consumer (object):
-    """consume urls from the url queue in a threaded manner"""
+    """
+    Consume urls from the url queue in a threaded manner.
+    """

    def __init__ (self, config, cache):
-        """initialize consumer data and threads"""
+        """
+        Initialize consumer data and threads.
+        """
        self.config = config
        self.cache = cache
        self.threader = linkcheck.threader.Threader()
@ -47,7 +53,9 @@ class Consumer (object):
        self.warnings = False

    def _set_threads (self, num):
-        """set number of checker threads to start"""
+        """
+        Set number of checker threads to start.
+        """
        linkcheck.log.debug(linkcheck.LOG_CHECK,
                            "set threading with %d threads", num)
        self.threader.threads_max = num
@ -57,7 +65,9 @@ class Consumer (object):
            sys.setcheckinterval(100)

    def append_url (self, url_data):
-        """append url to incoming check list"""
+        """
+        Append url to incoming check list.
+        """
        if not self.cache.incoming_add(url_data):
            # can be logged
            self.logger_new_url(url_data)
@ -78,7 +88,9 @@ class Consumer (object):
            self.threader.start_thread(url_data.check, ())

    def checked (self, url_data):
-        """put checked url in cache and log it"""
+        """
+        Put checked url in cache and log it.
+        """
        # log before putting it in the cache (otherwise we would see
        # a "(cached)" after every url
        self.logger_new_url(url_data)
@ -88,11 +100,15 @@ class Consumer (object):
            self.cache.in_progress_remove(url_data)

    def interrupted (self, url_data):
-        """remove url from active list"""
+        """
+        Remove url from active list.
+        """
        self.cache.in_progress_remove(url_data)

    def finished (self):
-        """return True if checking is finished"""
+        """
+        Return True if checking is finished.
+        """
        self.lock.acquire()
        try:
            return self.threader.finished() and \
@ -101,7 +117,9 @@ class Consumer (object):
            self.lock.release()

    def no_more_threads (self):
-        """return True if no more active threads are running"""
+        """
+        Return True if no more active threads are running.
+        """
        self.lock.acquire()
        try:
            return self.threader.finished()
@ -109,7 +127,9 @@ class Consumer (object):
            self.lock.release()

    def abort (self):
-        """abort checking and send of-of-output message to logger"""
+        """
+        Abort checking and send of-of-output message to logger.
+        """
        while not self.no_more_threads():
            linkcheck.log.warn(linkcheck.LOG_CHECK,
             _("keyboard interrupt; waiting for %d active threads to finish"),
@ -123,7 +143,9 @@ class Consumer (object):
        self.logger_end_output()

    def print_status (self, curtime, start_time):
-        """print check status looking at url queues"""
+        """
+        Print check status looking at url queues.
+        """
        self.lock.acquire()
        try:
            active = self.threader.active_threads()
@ -137,7 +159,9 @@ class Consumer (object):
            self.lock.release()

    def logger_start_output (self):
-        """start output of all configured loggers"""
+        """
+        Start output of all configured loggers.
+        """
        self.lock.acquire()
        try:
            self.logger.start_output()
@ -147,7 +171,9 @@ class Consumer (object):
            self.lock.release()

    def logger_new_url (self, url_data):
-        """send new url to all configured loggers"""
+        """
+        Send new url to all configured loggers.
+        """
        self.lock.acquire()
        try:
            self.linknumber += 1
@ -168,7 +194,9 @@ class Consumer (object):
        #    self.filter_queue(self)

    def logger_end_output (self):
-        """end output of all configured loggers"""
+        """
+        End output of all configured loggers.
+        """
        self.lock.acquire()
        try:
            self.logger.end_output(linknumber=self.linknumber)
@ -178,7 +206,9 @@ class Consumer (object):
            self.lock.release()

    def active_threads (self):
-        """return number of active threads"""
+        """
+        Return number of active threads.
+        """
        self.lock.acquire()
        try:
            return self.threader.active_threads()
--- a/linkcheck/checker/errorurl.py
+++ b/linkcheck/checker/errorurl.py
@ -1,5 +1,7 @@
 # -*- coding: iso-8859-1 -*-
-"""Handle for unknown links"""
+"""
+Handle for unknown links.
+"""
 # Copyright (C) 2001-2005  Bastian Kleineidam
 #
 # This program is free software; you can redistribute it and/or modify
@ -21,7 +23,9 @@ import urlbase
 import linkcheck

 class ErrorUrl (urlbase.UrlBase):
-    """Unknown URL links"""
+    """
+    Unknown URL links.
+    """

    def check_syntax (self):
        linkcheck.log.debug(linkcheck.LOG_CHECK, "checking syntax")
@ -31,6 +35,8 @@ class ErrorUrl (urlbase.UrlBase):
        return False

    def set_cache_keys (self):
-        """cache key is forbidden"""
+        """
+        Cache key is forbidden.
+        """
        raise NotImplementedError, "cache keys are forbidden"

--- a/linkcheck/checker/fileurl.py
+++ b/linkcheck/checker/fileurl.py
@ -1,5 +1,7 @@
 # -*- coding: iso-8859-1 -*-
-"""Handle local file: links"""
+"""
+Handle local file: links.
+"""
 # Copyright (C) 2000-2005  Bastian Kleineidam
 #
 # This program is free software; you can redistribute it and/or modify
@ -37,8 +39,9 @@ contents = {


 def get_files (dirname):
-    """Get lists of files in directory. Does only allow regular files
-       and directories, no symlinks.
+    """
+    Get lists of files in directory. Does only allow regular files
+    and directories, no symlinks.
    """
    files = []
    for entry in os.listdir(dirname):
@ -51,7 +54,9 @@ def get_files (dirname):


 def get_nt_filename (path):
-    """return case sensitive filename for NT path"""
+    """
+    Return case sensitive filename for NT path.
+    """
    head, tail = os.path.split(path)
    if not tail:
        return path
@ -64,7 +69,9 @@ def get_nt_filename (path):


 class FileUrl (urlbase.UrlBase):
-    "Url link with file scheme"
+    """
+    Url link with file scheme.
+    """

    def __init__ (self, base_url, recursion_level, consumer,
                  parent_url = None,
--- a/linkcheck/checker/ftpurl.py
+++ b/linkcheck/checker/ftpurl.py
@ -1,5 +1,7 @@
 # -*- coding: iso-8859-1 -*-
-"""Handle FTP links"""
+"""
+Handle FTP links.
+"""
 # Copyright (C) 2000-2005  Bastian Kleineidam
 #
 # This program is free software; you can redistribute it and/or modify
@ -28,7 +30,9 @@ import httpurl


 class FtpUrl (urlbase.UrlBase, proxysupport.ProxySupport):
-    """Url link with ftp scheme."""
+    """
+    Url link with ftp scheme.
+    """

    def __init__ (self, base_url, recursion_level, consumer,
                  parent_url = None,
@ -75,7 +79,9 @@ class FtpUrl (urlbase.UrlBase, proxysupport.ProxySupport):
        return super(FtpUrl, self).get_user_password()

    def login (self):
-        """log into ftp server and check the welcome message"""
+        """
+        Log into ftp server and check the welcome message.
+        """
        _user, _password = self.get_user_password()
        # ready to connect
        conn = self.consumer.cache.get_ftp_connection(
@ -111,8 +117,9 @@ class FtpUrl (urlbase.UrlBase, proxysupport.ProxySupport):
                      self.urlparts[1], _user, _password, self.url_connection)

    def cwd (self):
-        """Change to URL parent directory. Return filename of last path
-           component.
+        """
+        Change to URL parent directory. Return filename of last path
+        component.
        """
        dirname = self.urlparts[2].strip('/')
        dirs = dirname.split('/')
@ -123,7 +130,9 @@ class FtpUrl (urlbase.UrlBase, proxysupport.ProxySupport):
        return filename

    def listfile (self):
-        """see if filename is in the current FTP directory"""
+        """
+        See if filename is in the current FTP directory.
+        """
        if not self.filename:
            return
        files = self.get_files()
@ -141,8 +150,9 @@ class FtpUrl (urlbase.UrlBase, proxysupport.ProxySupport):
        raise ftplib.error_perm, "550 File not found"

    def get_files (self):
-        """Get list of filenames in directory. Subdirectories have an
-           ending slash.
+        """
+        Get list of filenames in directory. Subdirectories have an
+        ending slash.
        """
        # Rudimentary LIST output parsing. An entry is assumed to have
        # the following form:
--- a/linkcheck/checker/gopherurl.py
+++ b/linkcheck/checker/gopherurl.py
@ -1,5 +1,7 @@
 # -*- coding: iso-8859-1 -*-
-"""Handle Gopher links"""
+"""
+Handle Gopher links.
+"""
 # Copyright (C) 2000-2005  Bastian Kleineidam
 #
 # This program is free software; you can redistribute it and/or modify
@ -19,5 +21,7 @@
 import urlbase

 class GopherUrl (urlbase.UrlBase):
-    "Url link with gopher scheme"
+    """
+    Url link with gopher scheme.
+    """
    pass
--- a/linkcheck/checker/httpsurl.py
+++ b/linkcheck/checker/httpsurl.py
@ -1,5 +1,7 @@
 # -*- coding: iso-8859-1 -*-
-"""Handle https links"""
+"""
+Handle https links.
+"""
 # Copyright (C) 2000-2005  Bastian Kleineidam
 #
 # This program is free software; you can redistribute it and/or modify
@ -19,10 +21,14 @@
 import httpurl

 class HttpsUrl (httpurl.HttpUrl):
-    """Url link with https scheme"""
+    """
+    Url link with https scheme.
+    """

    def local_check (self):
-        """check connection if SSL is supported, else ignore"""
+        """
+        Check connection if SSL is supported, else ignore.
+        """
        if httpurl.supportHttps:
            super(HttpsUrl, self).local_check()
        else:
--- a/linkcheck/checker/httpurl.py
+++ b/linkcheck/checker/httpurl.py
@ -1,5 +1,7 @@
 # -*- coding: iso-8859-1 -*-
-"""Handle http links"""
+"""
+Handle http links.
+"""
 # Copyright (C) 2000-2005  Bastian Kleineidam
 #
 # This program is free software; you can redistribute it and/or modify
@ -43,11 +45,15 @@ _is_amazon = re.compile(r'^www\.amazon\.(com|de|ca|fr|co\.(uk|jp))').search


 class HttpUrl (urlbase.UrlBase, proxysupport.ProxySupport):
-    "Url link with http scheme"
+    """
+    Url link with http scheme.
+    """

    def __init__ (self, base_url, recursion_level, consumer,
                  parent_url=None, base_ref=None, line=0, column=0, name=u""):
-        """initialize basic url data and HTTP specific variables"""
+        """
+        Initialize basic url data and HTTP specific variables.
+        """
        super(HttpUrl, self).__init__(base_url, recursion_level, consumer,
               parent_url=parent_url, base_ref=base_ref, line=line,
               column=column, name=name)
@ -348,8 +354,9 @@ class HttpUrl (urlbase.UrlBase, proxysupport.ProxySupport):
            self.add_info(_("Last modified %s.") % modified)

    def _get_http_response (self):
-        """Put request and return (status code, status text, mime object).
-           Host can be host:port format.
+        """
+        Put request and return (status code, status text, mime object).
+        Host can be host:port format.
        """
        if self.proxy:
            host = self.proxy
--- a/linkcheck/checker/ignoredurl.py
+++ b/linkcheck/checker/ignoredurl.py
@ -1,5 +1,7 @@
 # -*- coding: iso-8859-1 -*-
-"""Handle for uncheckable application-specific links"""
+"""
+Handle for uncheckable application-specific links.
+"""
 # Copyright (C) 2001-2005  Bastian Kleineidam
 #
 # This program is free software; you can redistribute it and/or modify
@ -19,7 +21,9 @@
 import urlbase

 class IgnoredUrl (urlbase.UrlBase):
-    """Some schemes are defined in http://www.w3.org/Addressing/schemes"""
+    """
+    Some schemes are defined in <http://www.w3.org/Addressing/schemes>.
+    """

    def local_check (self):
        self.add_warning(_("%s URL ignored.") % self.scheme.capitalize())
--- a/linkcheck/checker/mailtourl.py
+++ b/linkcheck/checker/mailtourl.py
@ -1,5 +1,7 @@
 # -*- coding: iso-8859-1 -*-
-"""Handle for mailto: links"""
+"""
+Handle for mailto: links.
+"""
 # Copyright (C) 2000-2005  Bastian Kleineidam
 #
 # This program is free software; you can redistribute it and/or modify
@ -29,7 +31,9 @@ import linkcheck.dns.resolver


 class MailtoUrl (urlbase.UrlBase):
-    """Url link with mailto scheme"""
+    """
+    Url link with mailto scheme.
+    """

    def build_url (self):
        super(MailtoUrl, self).build_url()
@ -83,7 +87,8 @@ class MailtoUrl (urlbase.UrlBase):
        return addrs

    def check_connection (self):
-        """Verify a list of email addresses. If one address fails,
+        """
+        Verify a list of email addresses. If one address fails,
        the whole list will fail.
        For each mail address we check the following things:
        (1) Look up the MX DNS records. If we found no MX record,
@ -103,7 +108,9 @@ class MailtoUrl (urlbase.UrlBase):


    def check_smtp_domain (self, name, mail):
-        """Check a single mail address"""
+        """
+        Check a single mail address.
+        """
        linkcheck.log.debug(linkcheck.LOG_CHECK,
                            "checking mail address %r", mail)
        linkcheck.log.debug(linkcheck.LOG_CHECK, "splitting address")
@ -131,7 +138,9 @@ class MailtoUrl (urlbase.UrlBase):
        self.check_smtp_connect(mxdata, username)

    def check_smtp_connect (self, mxdata, username):
-        """mxdata is a list of (preference, host) tuples to check for"""
+        """
+        mxdata is a list of (preference, host) tuples to check for
+        """
        smtpconnect = 0
        for preference, host in mxdata:
            try:
@ -180,7 +189,9 @@ class MailtoUrl (urlbase.UrlBase):
                                  _("Could not split the mail address"))

    def close_connection (self):
-        """close a possibly opened SMTP connection"""
+        """
+        Close a possibly opened SMTP connection.
+        """
        if self.url_connection is None:
            # no connection is open
            return
@ -191,7 +202,9 @@ class MailtoUrl (urlbase.UrlBase):
        self.url_connection = None

    def set_cache_keys (self):
-        """The cache key is a comma separated list of emails."""
+        """
+        The cache key is a comma separated list of emails.
+        """
        emails = [addr[1] for addr in self.addresses]
        emails.sort()
        self.cache_url_key = u"%s:%s" % (self.scheme, u",".join(emails))
@ -199,7 +212,9 @@ class MailtoUrl (urlbase.UrlBase):
        # cache_content_key remains None, recursion is not allowed

    def can_get_content (self):
-        """mailto: URLs do not have any content
-           @return False
+        """
+        mailto: URLs do not have any content
+
+        @return: c{False}
        """
        return False
--- a/linkcheck/checker/nntpurl.py
+++ b/linkcheck/checker/nntpurl.py
@ -1,5 +1,7 @@
 # -*- coding: iso-8859-1 -*-
-"""Handle nntp: and news: links"""
+"""
+Handle nntp: and news: links.
+"""
 # Copyright (C) 2000-2005  Bastian Kleineidam
 #
 # This program is free software; you can redistribute it and/or modify
@ -31,11 +33,14 @@ import linkcheck.log
 random.seed()

 class NoNetrcNNTP (nntplib.NNTP):
-    """NNTP class ignoring possible entries in ~/.netrc"""
+    """
+    NNTP class ignoring possible entries in ~/.netrc.
+    """

    def __init__ (self, host, port=nntplib.NNTP_PORT, user=None,
                  password=None, readermode=None):
-        """Initialize an instance.  Arguments:
+        """
+        Initialize an instance.  Arguments:
        - host: hostname to connect to
        - port: port to connect to (default the standard NNTP port)
        - user: username to authenticate with
@ -95,7 +100,9 @@ class NoNetrcNNTP (nntplib.NNTP):


 class NntpUrl (urlbase.UrlBase):
-    """Url link with NNTP scheme"""
+    """
+    Url link with NNTP scheme.
+    """

    def check_connection (self):
        nntpserver = self.host or self.consumer.config["nntpserver"]
@ -124,9 +131,11 @@ class NntpUrl (urlbase.UrlBase):
                self.add_warning(_("No newsgroup specified in NNTP URL."))

    def _connectNntp (self, nntpserver):
-        """This is done only once per checking task. Also, the newly
+        """
+        This is done only once per checking task. Also, the newly
        introduced error codes 504 and 505 (both inclining "Too busy, retry
-        later", are caught."""
+        later", are caught.
+        """
        tries = 0
        nntp = value = None
        while tries < 5:
--- a/linkcheck/checker/proxysupport.py
+++ b/linkcheck/checker/proxysupport.py
@ -18,10 +18,14 @@
 import urllib

 class ProxySupport (object):
-    """get support for proxying and for urls with user:pass@host setting"""
+    """
+    Get support for proxying and for urls with user:pass@host setting.
+    """

    def set_proxy (self, proxy):
-        """parse given proxy information and store parsed values"""
+        """
+        Parse given proxy information and store parsed values.
+        """
        self.proxy = proxy
        self.proxyauth = None
        if self.proxy:
--- a/linkcheck/checker/telneturl.py
+++ b/linkcheck/checker/telneturl.py
@ -1,5 +1,7 @@
 # -*- coding: iso-8859-1 -*-
-"""Handle telnet: links"""
+"""
+Handle telnet: links.
+"""
 # Copyright (C) 2000-2005  Bastian Kleineidam
 #
 # This program is free software; you can redistribute it and/or modify
@ -25,7 +27,9 @@ import urlbase


 class TelnetUrl (urlbase.UrlBase):
-    """Url link with telnet scheme"""
+    """
+    Url link with telnet scheme.
+    """

    def build_url (self):
        super(TelnetUrl, self).build_url()
--- a/linkcheck/checker/urlbase.py
+++ b/linkcheck/checker/urlbase.py
@ -1,5 +1,7 @@
 # -*- coding: iso-8859-1 -*-
-"""Base URL handler"""
+"""
+Base URL handler.
+"""
 # Copyright (C) 2000-2005  Bastian Kleineidam
 #
 # This program is free software; you can redistribute it and/or modify
@ -40,7 +42,9 @@ import linkcheck.HtmlParser.htmlsax
 stderr = codecs.getwriter("iso8859-1")(sys.stderr, errors="ignore")

 def internal_error ():
-    """print internal error message to stderr"""
+    """
+    Print internal error message to stderr.
+    """
    print >> stderr, os.linesep
    print >> stderr, _("""********** Oops, I did it again. *************

@ -66,7 +70,9 @@ I can work with ;) .


 def print_app_info ():
-    """print system and application info to stderr"""
+    """
+    Print system and application info to stderr.
+    """
    print >> stderr, _("System info:")
    print >> stderr, linkcheck.configuration.App
    print >> stderr, _("Python %s on %s") % (sys.version, sys.platform)
@ -83,21 +89,24 @@ def urljoin (parent, url, scheme):


 class UrlBase (object):
-    """An URL with additional information like validity etc."""
+    """
+    An URL with additional information like validity etc.
+    """

    def __init__ (self, base_url, recursion_level, consumer,
                  parent_url = None, base_ref = None,
                  line = -1, column = -1, name = u""):
-        """Initialize check data, and store given variables.
+        """
+        Initialize check data, and store given variables.

-           @base_url - unquoted and possibly unnormed url
-           @recursion_level - on what check level lies the base url
-           @config - Configuration instance
-           @parent_url - quoted and normed url of parent or None
-           @base_ref - quoted and normed url of <base href=""> or None
-           @line - line number of url in parent content
-           @column - column number of url in parent content
-           @name - name of url or empty
+        @base_url - unquoted and possibly unnormed url
+        @recursion_level - on what check level lies the base url
+        @config - Configuration instance
+        @parent_url - quoted and normed url of parent or None
+        @base_ref - quoted and normed url of <base href=""> or None
+        @line - line number of url in parent content
+        @column - column number of url in parent content
+        @name - name of url or empty
        """
        self.base_ref = base_ref
        # note that self.base_url must not be modified
@ -157,36 +166,52 @@ class UrlBase (object):
        self.aliases = []

    def set_result (self, msg, valid=True):
-        """set result string and validity"""
+        """
+        Set result string and validity.
+        """
        self.result = msg
        self.valid = valid

    def is_parseable (self):
-        """return True iff content of this url is parseable"""
+        """
+        Return True iff content of this url is parseable.
+        """
        return False

    def is_html (self):
-        """return True iff content of this url is HTML formatted"""
+        """
+        Return True iff content of this url is HTML formatted.
+        """
        return False

    def is_http (self):
-        """return True for http:// URLs"""
+        """
+        Return True for http:// URLs.
+        """
        return False

    def is_file (self):
-        """return True for file:// URLs"""
+        """
+        Return True for file:// URLs.
+        """
        return False

    def add_warning (self, s):
-        """add a warning string"""
+        """
+        Add a warning string.
+        """
        self.warning.append(s)

    def add_info (self, s):
-        """add an info string"""
+        """
+        Add an info string.
+        """
        self.info.append(s)

    def copy_from_cache (self, cache_data):
-        """fill attributes from cache data"""
+        """
+        Fill attributes from cache data.
+        """
        self.result = cache_data["result"]
        self.warning.extend(cache_data["warning"])
        self.info.extend(cache_data["info"])
@ -196,7 +221,9 @@ class UrlBase (object):
        self.cached = True

    def get_cache_data (self):
-        """return all data values that should be put in the cache"""
+        """
+        Return all data values that should be put in the cache.
+        """
        return {"result": self.result,
                "warning": self.warning,
                "info": self.info,
@ -206,7 +233,9 @@ class UrlBase (object):
               }

    def set_cache_keys (self):
-        """Set keys for URL checking and content recursion."""
+        """
+        Set keys for URL checking and content recursion.
+        """
        # remove anchor from content cache key since we assume
        # URLs with different anchors to have the same content
        self.cache_content_key = urlparse.urlunsplit(self.urlparts[:4]+[u''])
@ -230,10 +259,11 @@ class UrlBase (object):
                            self.cache_url_key)

    def check_syntax (self):
-        """Called before self.check(), this function inspects the
-           url syntax. Success enables further checking, failure
-           immediately logs this url. Syntax checks must not
-           use any network resources.
+        """
+        Called before self.check(), this function inspects the
+        url syntax. Success enables further checking, failure
+        immediately logs this url. Syntax checks must not
+        use any network resources.
        """
        linkcheck.log.debug(linkcheck.LOG_CHECK, "checking syntax")
        if not self.base_url:
@ -249,8 +279,9 @@ class UrlBase (object):
        return True

    def build_url (self):
-        """Construct self.url and self.urlparts out of the given base
-           url information self.base_url, self.parent_url and self.base_ref.
+        """
+        Construct self.url and self.urlparts out of the given base
+        url information self.base_url, self.parent_url and self.base_ref.
        """
        # norm base url
        base_url, is_idn = linkcheck.url.url_norm(self.base_url)
@ -292,7 +323,9 @@ class UrlBase (object):
            self.port = int(self.port)

    def check (self):
-        """main check function for checking this URL"""
+        """
+        Main check function for checking this URL.
+        """
        try:
            self.local_check()
            self.consumer.checked(self)
@ -313,7 +346,9 @@ class UrlBase (object):
            internal_error()

    def local_check (self):
-        """local check function can be overridden in subclasses"""
+        """
+        Local check function can be overridden in subclasses.
+        """
        linkcheck.log.debug(linkcheck.LOG_CHECK, "Checking %s", self)
        if self.recursion_level and self.consumer.config['wait']:
            linkcheck.log.debug(linkcheck.LOG_CHECK,
@ -375,7 +410,9 @@ class UrlBase (object):
        self.close_connection()

    def close_connection (self):
-        """close an opened url connection"""
+        """
+        Close an opened url connection.
+        """
        # brute force closing
        if self.url_connection is not None:
            try:
@ -387,13 +424,16 @@ class UrlBase (object):
            self.url_connection = None

    def check_connection (self):
-        """The basic connection check uses urllib2.urlopen to initialize
-           a connection object.
+        """
+        The basic connection check uses urllib2.urlopen to initialize
+        a connection object.
        """
        self.url_connection = urllib2.urlopen(self.url)

    def allows_recursion (self):
-        """return True iff we can recurse into the url's content"""
+        """
+        Return True iff we can recurse into the url's content.
+        """
        #linkcheck.log.debug(linkcheck.LOG_CHECK, "valid=%s, parseable=%s, "\
        #                    "content=%s, extern=%s, robots=%s",
        #                    self.valid, self.is_parseable(),
@ -409,8 +449,9 @@ class UrlBase (object):
            not self.extern[0] and self.content_allows_robots()

    def content_allows_robots (self):
-        """return True if the content of this URL forbids robots to
-           search for recursive links.
+        """
+        Return True if the content of this URL forbids robots to
+        search for recursive links.
        """
        if not self.is_html():
            return True
@ -491,11 +532,15 @@ class UrlBase (object):
            return (1, 0)

    def can_get_content (self):
-        """indicate wether url get_content() can be called"""
+        """
+        Indicate wether url get_content() can be called.
+        """
        return True

    def get_content (self):
-        """Precondition: url_connection is an opened URL."""
+        """
+        Precondition: url_connection is an opened URL.
+        """
        if not self.has_content:
            t = time.time()
            self.data = self.url_connection.read()
@ -505,8 +550,9 @@ class UrlBase (object):
        return self.data

    def check_content (self, warningregex):
-        """If a warning expression was given, call this function to check it
-           against the content of this url.
+        """
+        If a warning expression was given, call this function to check it
+        against the content of this url.
        """
        if not self.can_get_content():
            return
@ -515,8 +561,10 @@ class UrlBase (object):
            self.add_warning(_("Found %r in link contents.") % match.group())

    def check_size (self):
-        """if a maximum size was given, call this function to check it
-           against the content size of this url"""
+        """
+        If a maximum size was given, call this function to check it
+        against the content size of this url.
+        """
        maxbytes = self.consumer.config["warnsizebytes"]
        if maxbytes is not None and self.dlsize >= maxbytes:
            self.add_warning(_("Content size %s is larger than %s.") % \
@ -524,16 +572,18 @@ class UrlBase (object):
                          linkcheck.strformat.strsize(maxbytes)))

    def parse_url (self):
-        """Parse url content and search for recursive links.
-           Default parse type is html.
+        """
+        Parse url content and search for recursive links.
+        Default parse type is html.
        """
        linkcheck.log.debug(linkcheck.LOG_CHECK,
                            "Parsing recursively into %s", self)
        self.parse_html()

    def get_user_password (self):
-        """Get tuple (user, password) from configured authentication.
-           Both user and password can be None if not specified.
+        """
+        Get tuple (user, password) from configured authentication.
+        Both user and password can be None if not specified.
        """
        for auth in self.consumer.config["authentication"]:
            if auth['pattern'].match(self.url):
@ -541,8 +591,9 @@ class UrlBase (object):
        return None, None

    def parse_html (self):
-        """Parse into HTML content and search for URLs to check.
-           Found URLs are added to the URL queue.
+        """
+        Parse into HTML content and search for URLs to check.
+        Found URLs are added to the URL queue.
        """
        h = linkcheck.linkparse.LinkFinder(self.get_content())
        p = linkcheck.HtmlParser.htmlsax.parser(h)
@ -565,7 +616,9 @@ class UrlBase (object):
            self.consumer.append_url(url_data)

    def parse_opera (self):
-        """parse an opera bookmark file"""
+        """
+        Parse an opera bookmark file.
+        """
        name = ""
        lineno = 0
        lines = self.get_content().splitlines()
@ -584,8 +637,9 @@ class UrlBase (object):
                name = ""

    def parse_text (self):
-        """parse a text file with on url per line; comment and blank
-           lines are ignored
+        """
+        Parse a text file with on url per line; comment and blank
+        lines are ignored.
        """
        lineno = 0
        for line in self.get_content().splitlines():
@ -599,7 +653,9 @@ class UrlBase (object):
            self.consumer.append_url(url_data)

    def parse_css (self):
-        """parse a CSS file for url() patterns"""
+        """
+        Parse a CSS file for url() patterns.
+        """
        lineno = 0
        for line in self.get_content().splitlines():
            lineno += 1
@ -612,7 +668,9 @@ class UrlBase (object):
                self.consumer.append_url(url_data)

    def serialized (self):
-        """return serialized url check data as unicode string"""
+        """
+        Return serialized url check data as unicode string.
+        """
        sep = unicode(os.linesep)
        assert isinstance(self.base_url, unicode), self
        if self.parent_url is not None: