use a decorator to synchronize all the checker threads

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2614 e7d03fd6-7b0d-0410-9947-9c21f3af8025
2026-04-23 23:54:44 +00:00 · 2005-05-19 15:49:06 +00:00 · 2005-05-19 15:49:06 +00:00 · 55d7d5f7df
commit 55d7d5f7df
parent 3f6d2fa753
13 changed files with 246 additions and 294 deletions
--- a/linkcheck/checker/init.py
+++ b/linkcheck/checker/init.py
@ -134,13 +134,14 @@ def _check_urls (consumer):
    start_time = time.time()
    status_time = start_time
    while not consumer.finished():
-        consumer.check_url()
-        if consumer.config['status']:
+        if not consumer.check_url():
+            time.sleep(0.1)
+        if consumer.config('status'):
            curtime = time.time()
            if (curtime - status_time) > 5:
                consumer.print_status(curtime, start_time)
                status_time = curtime
-    consumer.logger_end_output()
+    consumer.end_log_output()


 # file extensions we can parse recursively
@ -252,7 +253,7 @@ def get_url_from (base_url, recursion_level, consumer,
        pat = url_data.get_intern_pattern()
        linkcheck.log.debug(linkcheck.LOG_CMDLINE, "Pattern %r", pat)
        if pat:
-            consumer.config['internlinks'].append(linkcheck.get_link_pat(pat))
+            consumer.config_append('internlinks', linkcheck.get_link_pat(pat))
    return url_data


--- a/linkcheck/checker/cache.py
+++ b/linkcheck/checker/cache.py
@ -48,13 +48,11 @@ def _check_morsel (m, host, path):
    return m.output(header='').strip()


-class Cache (linkcheck.lock.AssertLock):
+class Cache (object):
    """
    Store and provide routines for cached data. Currently there are
    caches for cookies, checked URLs, FTP connections and robots.txt
    contents.
-
-    All public operations (except __init__()) are thread-safe.
    """

    def __init__ (self):
@ -63,10 +61,13 @@ class Cache (linkcheck.lock.AssertLock):
        """
        super(Cache, self).__init__()
        # already checked URLs
+        # {cache key (string) -> cache data (dict)}
        self.checked = {}
        # URLs that are being checked
+        # {cache key (string) -> urldata (UrlData)}
        self.in_progress = {}
        # to-be-checked URLs
+        # [urldata (UrlData)]
        self.incoming = collections.deque()
        # downloaded robots.txt files
        self.robots_txt = {}
@ -79,11 +80,7 @@ class Cache (linkcheck.lock.AssertLock):
        """
        Check if incoming queue is empty.
        """
-        self.acquire()
-        try:
-            return len(self.incoming) <= 0
-        finally:
-            self.release()
+        return len(self.incoming) <= 0

    def incoming_get_url (self):
        """
@ -91,57 +88,44 @@ class Cache (linkcheck.lock.AssertLock):
        return it. If no such url is available return None. The
        url might be already cached.
        """
-        self.acquire()
-        try:
-            for i, url_data in enumerate(self.incoming):
-                key = url_data.cache_url_key
-                if key in self.checked:
-                    del self.incoming[i]
-                    # url is cached and can be logged
-                    url_data.copy_from_cache(self.checked[key])
-                    return url_data
-                elif key not in self.in_progress:
-                    del self.incoming[i]
-                    self.in_progress[key] = url_data
-                    return url_data
-            return None
-        finally:
-            self.release()
+        for i, url_data in enumerate(self.incoming):
+            key = url_data.cache_url_key
+            if key in self.checked:
+                del self.incoming[i]
+                # url is cached and can be logged
+                url_data.copy_from_cache(self.checked[key])
+                return url_data
+            elif key not in self.in_progress:
+                del self.incoming[i]
+                self.in_progress[key] = url_data
+                return url_data
+        return None

    def incoming_len (self):
        """
        Return number of entries in incoming queue.
        """
-        self.acquire()
-        try:
-            return len(self.incoming)
-        finally:
-            self.release()
+        return len(self.incoming)

    def incoming_add (self, url_data):
        """
        Add a new URL to list of URLs to check.
        """
-        self.acquire()
-        try:
-            linkcheck.log.debug(linkcheck.LOG_CACHE,
-                                "Add url %s...", repr(url_data))
-            # check syntax
-            if not url_data.check_syntax():
-                # wrong syntax, do not check any further
-                return False
-            # check the cache
-            key = url_data.cache_url_key
-            if key in self.checked:
-                # url is cached and can be logged
-                url_data.copy_from_cache(self.checked[key])
-                return False
-            # url is not cached, so add to incoming queue
-            self.incoming.append(url_data)
-            linkcheck.log.debug(linkcheck.LOG_CACHE, "...added.")
-            return True
-        finally:
-            self.release()
+        linkcheck.log.debug(linkcheck.LOG_CACHE,
+                            "Add url %s...", repr(url_data))
+        if url_data.has_result:
+            # do not check any further
+            return False
+        # check the cache
+        key = url_data.cache_url_key
+        if key in self.checked:
+            # url is cached and can be logged
+            url_data.copy_from_cache(self.checked[key])
+            return False
+        # url is not cached, so add to incoming queue
+        self.incoming.append(url_data)
+        linkcheck.log.debug(linkcheck.LOG_CACHE, "...added.")
+        return True

    def has_incoming (self, key):
        """
@ -150,11 +134,7 @@ class Cache (linkcheck.lock.AssertLock):
        @param key: Usually obtained from url_data.cache_url_key
        @type key: String
        """
-        self.acquire()
-        try:
-            return key in self.incoming
-        finally:
-            self.release()
+        return key in self.incoming

    def has_in_progress (self, key):
        """
@ -163,44 +143,32 @@ class Cache (linkcheck.lock.AssertLock):
        @param key: Usually obtained from url_data.cache_url_key
        @type key: String
        """
-        self.acquire()
-        try:
-            return key in self.in_progress
-        finally:
-            self.release()
+        return key in self.in_progress

    def in_progress_remove (self, url_data, ignore_missing=False):
        """
        Remove url from in-progress cache. If url is not cached and
        ignore_missing evaluates True, raise AssertionError.
        """
-        self.acquire()
-        try:
-            key = url_data.cache_url_key
-            if key in self.in_progress:
-                del self.in_progress[key]
-            else:
-                assert ignore_missing, repr(key)
-        finally:
-            self.release()
+        key = url_data.cache_url_key
+        if key in self.in_progress:
+            del self.in_progress[key]
+        else:
+            assert ignore_missing, repr(key)

    def checked_add (self, url_data):
        """
        Cache checked url data.
        """
-        self.acquire()
-        try:
-            data = url_data.get_cache_data()
-            key = url_data.cache_url_key
-            linkcheck.log.debug(linkcheck.LOG_CACHE, "Cache key %r...", key)
-            assert key not in self.checked, \
-                   key + u", " + unicode(self.checked[key])
-            assert key in self.in_progress, key
-            # move entry from self.in_progress to self.checked
-            del self.in_progress[key]
-            self.checked[key] = data
-        finally:
-            self.release()
+        data = url_data.get_cache_data()
+        key = url_data.cache_url_key
+        linkcheck.log.debug(linkcheck.LOG_CACHE, "Cache key %r...", key)
+        assert key not in self.checked, \
+               key + u", " + unicode(self.checked[key])
+        assert key in self.in_progress, key
+        # move entry from self.in_progress to self.checked
+        del self.in_progress[key]
+        self.checked[key] = data

    def checked_redirect (self, redirect, url_data):
        """
@ -209,96 +177,69 @@ class Cache (linkcheck.lock.AssertLock):
        If the redirect URL is found in the cache, the result data is
        already copied.
        """
-        self.acquire()
-        try:
-            if redirect in self.checked:
-                url_data.copy_from_cache(self.checked[redirect])
-                return True
-            return False
-        finally:
-            self.release()
+        if redirect in self.checked:
+            url_data.copy_from_cache(self.checked[redirect])
+            return True
+        return False

    def robots_txt_allows_url (self, roboturl, url, user, password):
        """
        Ask robots.txt allowance.
        """
-        self.acquire()
-        try:
-            if roboturl not in self.robots_txt:
-                rp = linkcheck.robotparser2.RobotFileParser(
-                                                user=user, password=password)
-                rp.set_url(roboturl)
-                rp.read()
-                self.robots_txt[roboturl] = rp
-            else:
-                rp = self.robots_txt[roboturl]
-            return rp.can_fetch(linkcheck.configuration.UserAgent, url)
-        finally:
-            self.release()
+        if roboturl not in self.robots_txt:
+            rp = linkcheck.robotparser2.RobotFileParser(
+                                            user=user, password=password)
+            rp.set_url(roboturl)
+            rp.read()
+            self.robots_txt[roboturl] = rp
+        else:
+            rp = self.robots_txt[roboturl]
+        return rp.can_fetch(linkcheck.configuration.UserAgent, url)

    def get_connection (self, key):
        """
        Get open connection to given host. Return None if no such
        connection is available (or the old one timed out).
        """
-        self.acquire()
-        try:
-            return self.pool.get_connection(key)
-        finally:
-            self.release()
+        return self.pool.get_connection(key)

    def add_connection (self, key, connection, timeout):
        """
        Store open connection into pool for reuse.
        """
-        self.acquire()
-        try:
-            self.pool.add_connection(key, connection, timeout)
-        finally:
-            self.release()
+        self.pool.add_connection(key, connection, timeout)

    def release_connection (self, key):
        """
        Remove connection from pool.
        """
-        self.acquire()
-        try:
-            self.pool.release_connection(key)
-        finally:
-            self.release()
+        self.pool.release_connection(key)

    def store_cookies (self, headers, host):
        """
        Thread-safe cookie cache setter function. Can raise the
        exception Cookie.CookieError.
        """
-        self.acquire()
-        try:
-            output = []
-            for h in headers.getallmatchingheaders("Set-Cookie"):
-                output.append(h)
-                linkcheck.log.debug(linkcheck.LOG_CACHE, "Store cookie %s", h)
-                c = self.cookies.setdefault(host, Cookie.SimpleCookie())
-                c.load(h)
-            return output
-        finally:
-            self.release()
+        output = []
+        for h in headers.getallmatchingheaders("Set-Cookie"):
+            output.append(h)
+            linkcheck.log.debug(linkcheck.LOG_CACHE, "Store cookie %s", h)
+            c = self.cookies.setdefault(host, Cookie.SimpleCookie())
+            c.load(h)
+        return output

    def get_cookies (self, host, path):
        """
        Thread-safe cookie cache getter function.
        """
-        self.acquire()
-        try:
-            linkcheck.log.debug(linkcheck.LOG_CACHE,
-                                "Get cookies for host %r path %r", host, path)
-            if not self.cookies.has_key(host):
-                return []
-            cookievals = []
-            for m in self.cookies[host].values():
-                val = _check_morsel(m, host, path)
-                if val:
-                    cookievals.append(val)
-            return cookievals
-        finally:
-            self.release()
+        linkcheck.log.debug(linkcheck.LOG_CACHE,
+                            "Get cookies for host %r path %r", host, path)
+        if not self.cookies.has_key(host):
+            return []
+        cookievals = []
+        for m in self.cookies[host].values():
+            val = _check_morsel(m, host, path)
+            if val:
+                cookievals.append(val)
+        return cookievals
+
--- a/linkcheck/checker/consumer.py
+++ b/linkcheck/checker/consumer.py
@ -20,14 +20,22 @@ Url consumer class.

 import sys
 import time
+try:
+    import thread
+except ImportError:
+    import dummy_thread as thread

 import linkcheck.threader
 import linkcheck.log
 import linkcheck.lock
 import linkcheck.strformat
 import linkcheck.checker.geoip
+from linkcheck.decorators import synchronized
 from urlbase import stderr

+# global lock for synchronizing all the checker threads
+_lock = thread.allocate_lock()
+

 def print_tocheck (tocheck):
    msg = _n("%5d URL queued,", "%5d URLs queued,", tocheck) % tocheck
@ -49,7 +57,7 @@ def print_duration (duration):
    print >> stderr, msg,


-class Consumer (linkcheck.lock.AssertLock):
+class Consumer (object):
    """
    Consume URLs from the URL queue in a thread-safe manner.
    """
@ -59,33 +67,46 @@ class Consumer (linkcheck.lock.AssertLock):
        Initialize consumer data and threads.
        """
        super(Consumer, self).__init__()
-        self.config = config
-        self.cache = cache
-        self.threader = linkcheck.threader.Threader(num=config['threads'])
-        self.logger = config['logger']
-        self.fileoutput = config['fileoutput']
-        self.logger_start_output()
+        self._config = config
+        self._cache = cache
+        self._threader = linkcheck.threader.Threader(num=config['threads'])
+        self.start_log_output()

+    @synchronized(_lock)
+    def config (self, key):
+        return self._config[key]
+
+    @synchronized(_lock)
+    def config_append (self, key, val):
+        self._config[key].append(val)
+
+    @synchronized(_lock)
+    def __getattr__ (self, name):
+        if hasattr(self._cache, name):
+            return getattr(self._cache, name)
+        raise AttributeError(name)
+
+    @synchronized(_lock)
    def append_url (self, url_data):
        """
        Append url to incoming check list.
        """
-        if not self.cache.incoming_add(url_data):
+        if not self._cache.incoming_add(url_data):
            # can be logged
-            self.logger_log_url(url_data)
+            self._log_url(url_data)

+    @synchronized(_lock)
    def check_url (self):
        """
        Start new thread checking the given url.
        """
-        url_data = self.cache.incoming_get_url()
+        url_data = self._cache.incoming_get_url()
        if url_data is None:
-            # active connections are downloading/parsing, so
-            # wait a little
-            time.sleep(0.1)
+            # active connections are downloading/parsing
+            pass
        elif url_data.cached:
            # was cached -> can be logged
-            self.logger_log_url(url_data)
+            self._log_url(url_data)
        else:
            # go check this url
            # this calls either self.checked() or self.interrupted()
@ -95,47 +116,48 @@ class Consumer (linkcheck.lock.AssertLock):
            else:
                name = u""
            name += url_data.base_url
-            self.threader.start_thread(url_data.check, (), name=name)
+            self._threader.start_thread(url_data.check, (), name=name)
+        return url_data and not url_data.cached

+    @synchronized(_lock)
    def checked (self, url_data):
        """
        Put checked url in cache and log it.
        """
        # log before putting it in the cache (otherwise we would see
        # a "(cached)" after every url
-        self.logger_log_url(url_data)
+        self._log_url(url_data)
        if not url_data.cached:
-            self.cache.checked_add(url_data)
+            self._cache.checked_add(url_data)
        else:
-            self.cache.in_progress_remove(url_data)
+            self._cache.in_progress_remove(url_data)

+    @synchronized(_lock)
    def interrupted (self, url_data):
        """
        Remove url from active list.
        """
-        self.cache.in_progress_remove(url_data, ignore_missing=True)
+        self._cache.in_progress_remove(url_data, ignore_missing=True)

+    @synchronized(_lock)
    def finished (self):
        """
        Return True if checking is finished.
        """
        # avoid deadlock by requesting cache data before locking
-        tocheck = self.cache.incoming_len()
-        self.acquire()
-        try:
-            return self.threader.finished() and tocheck == 0
-        finally:
-            self.release()
+        return self._threader.finished() and \
+               self._cache.incoming_len() == 0

+    @synchronized(_lock)
+    def finish (self):
+        self._threader.finish()
+
+    @synchronized(_lock)
    def no_more_threads (self):
        """
        Return True if no more active threads are running.
        """
-        self.acquire()
-        try:
-            return self.threader.finished()
-        finally:
-            self.release()
+        return self._threader.finished()

    def abort (self):
        """
@ -148,7 +170,7 @@ class Consumer (linkcheck.lock.AssertLock):
            if num_waited > wait_max:
                linkcheck.log.error(linkcheck.LOG_CHECK,
                                    "Thread wait timeout")
-                self.logger_end_output()
+                self.end_log_output()
                sys.exit(1)
            num = self.active_threads()
            msg = \
@ -156,94 +178,69 @@ class Consumer (linkcheck.lock.AssertLock):
               "keyboard interrupt; waiting for %d active threads to finish",
               num)
            linkcheck.log.warn(linkcheck.LOG_CHECK, msg, num)
-            self.acquire()
-            try:
-                self.threader.finish()
-            finally:
-                self.release()
+            self.finish()
            num_waited += 1
            time.sleep(2)
-        self.logger_end_output()
+        self.end_log_output()

+    @synchronized(_lock)
    def print_status (self, curtime, start_time):
        """
        Print check status looking at url queues.
        """
        # avoid deadlock by requesting cache data before locking
-        tocheck = self.cache.incoming_len()
-        active = self.active_threads()
-        self.acquire()
-        try:
-            print >> stderr, _("Status:"),
-            print_active(active)
-            print_links(self.logger.number)
-            print_tocheck(tocheck)
-            print_duration(curtime - start_time)
-            print >> stderr
-        finally:
-            self.release()
+        print >> stderr, _("Status:"),
+        print_active(self._threader.active_threads())
+        print_links(self._config['logger'].number)
+        print_tocheck(self._cache.incoming_len())
+        print_duration(curtime - start_time)
+        print >> stderr

-    def logger_start_output (self):
+    @synchronized(_lock)
+    def start_log_output (self):
        """
        Start output of all configured loggers.
        """
-        self.acquire()
-        try:
-            self.logger.start_output()
-            for logger in self.fileoutput:
-                logger.start_output()
-        finally:
-            self.release()
+        self._config['logger'].start_output()
+        for logger in self._config['fileoutput']:
+            logger.start_output()

-    def logger_log_url (self, url_data):
+    def _log_url (self, url_data):
        """
        Send new url to all configured loggers.
        """
-        self.acquire()
-        try:
-            do_print = self.config["verbose"] or not url_data.valid or \
-                (url_data.warning and self.config["warnings"])
-            self.logger.log_filter_url(url_data, do_print)
-            for log in self.fileoutput:
-                log.log_filter_url(url_data, do_print)
-        finally:
-            self.release()
+        do_print = self._config["verbose"] or not url_data.valid or \
+            (url_data.warning and self._config["warnings"])
+        self._config['logger'].log_filter_url(url_data, do_print)
+        for log in self._config['fileoutput']:
+            log.log_filter_url(url_data, do_print)
        # do_filter = (self.linknumber % 1000) == 0
        # XXX deadlock!
        #if do_filter:
        #    self.filter_queue(self)

-    def logger_end_output (self):
+    @synchronized(_lock)
+    def end_log_output (self):
        """
        End output of all configured loggers.
        """
-        self.acquire()
-        try:
-            self.logger.end_output()
-            for logger in self.fileoutput:
-                logger.end_output()
-        finally:
-            self.release()
+        self._config['logger'].end_output()
+        for logger in self._config['fileoutput']:
+            logger.end_output()

+    @synchronized(_lock)
    def active_threads (self):
        """
        Return number of active threads.
        """
-        self.acquire()
-        try:
-            return self.threader.active_threads()
-        finally:
-            self.release()
+        return self._threader.active_threads()

+    @synchronized(_lock)
    def get_country_name (self, host):
        """
        Return country code for host if found, else None.
        """
-        self.acquire()
-        try:
-            gi = self.config["geoip"]
-            if gi:
-                return linkcheck.checker.geoip.get_country(gi, host)
-            return None
-        finally:
-            self.release()
+        gi = self._config["geoip"]
+        if gi:
+            return linkcheck.checker.geoip.get_country(gi, host)
+        return None
--- a/linkcheck/checker/ftpurl.py
+++ b/linkcheck/checker/ftpurl.py
@ -58,12 +58,12 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
        order: login, changing directory, list the file.
        """
        # proxy support (we support only http)
-        self.set_proxy(self.consumer.config["proxy"].get(self.scheme))
+        self.set_proxy(self.consumer.config("proxy").get(self.scheme))
        if self.proxy:
            # using a (HTTP) proxy
            http = httpurl.HttpUrl(self.base_url,
                  self.recursion_level,
-                  self.consumer.config,
+                  self.consumer,
                  parent_url=self.parent_url,
                  base_ref=self.base_ref,
                  line=self.line,
@ -92,7 +92,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
        # ready to connect
        _user, _password = self.get_user_password()
        key = ("ftp", self.urlparts[1], _user, _password)
-        conn = self.consumer.cache.get_connection(key)
+        conn = self.consumer.get_connection(key)
        if conn is not None and conn.sock is not None:
            # reuse cached FTP connection
            self.url_connection = conn
@ -250,6 +250,6 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
        # add to cached connections
        _user, _password = self.get_user_password()
        key = ("ftp", self.urlparts[1], _user, _password)
-        cache_add = self.consumer.cache.add_connection
+        cache_add = self.consumer.add_connection
        cache_add(key, self.url_connection, DEFAULT_TIMEOUT_SECS)
        self.url_connection = None
--- a/linkcheck/checker/httpurl.py
+++ b/linkcheck/checker/httpurl.py
@ -78,8 +78,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
        """
        roboturl = self.get_robots_txt_url()
        user, password = self.get_user_password()
-        return self.consumer.cache.robots_txt_allows_url(roboturl, url,
-                                                         user, password)
+        return self.consumer.robots_txt_allows_url(roboturl, url,
+                                                   user, password)

    def check_connection (self):
        """
@ -124,15 +124,17 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
        | extension-code
        """
        # set the proxy, so a 407 status after this is an error
-        self.set_proxy(self.consumer.config["proxy"].get(self.scheme))
+        self.set_proxy(self.consumer.config("proxy").get(self.scheme))
        # initialize check data
        self.headers = None
        self.auth = None
        self.cookies = []
        # check robots.txt
        if not self.allows_robots(self.url):
-            self.add_info(
+            # remove all previously stored results
+            self.add_warning(
                       _("Access denied by robots.txt, checked only syntax."))
+            self.set_result(u"syntax OK")
            return
        # check for amazon server quirk
        if _is_amazon(self.urlparts[1]):
@ -144,12 +146,23 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
            self.method = "HEAD"
        # check the http connection
        response, fallback_GET = self.check_http_connection()
+        if self.headers and self.headers.has_key("Server"):
+            server = self.headers['Server']
+        else:
+            server = _("unknown")
+        if fallback_GET:
+            self.add_info(_("Server %r did not support HEAD request; "\
+                            "a GET request was used instead.") % server)
+        if self.no_anchor:
+            self.add_warning(_("Server %r had no anchor support, removed"\
+                               " anchor from request.") % server)
        # redirections might have changed the URL
        newurl = urlparse.urlunsplit(self.urlparts)
        if self.url != newurl:
            self.url = newurl
        # check response
-        self.check_response(response, fallback_GET)
+        if response:
+            self.check_response(response)

    def check_http_connection (self):
        """
@ -205,7 +218,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
                raise
            if tries == -1:
                linkcheck.log.debug(linkcheck.LOG_CHECK, "already handled")
-                return response, fallback_GET
+                return None, fallback_GET
            if tries >= self.max_redirects:
                if self.method == "HEAD":
                    # Microsoft servers tend to recurse HEAD requests
@ -276,11 +289,13 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
            if self.is_extern():
                self.add_info(
                          _("Outside of domain filter, checked only syntax."))
+                self.set_result(u"filtered")
                return -1, response
            # check robots.txt allowance again
            if not self.allows_robots(redirected):
                self.add_warning(
                       _("Access denied by robots.txt, checked only syntax."))
+                self.set_result(u"syntax OK")
                return -1, response
            # see about recursive redirect
            all_seen = [self.cache_url_key] + self.aliases
@ -330,7 +345,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
            tries += 1
        return tries, response

-    def check_response (self, response, fallback_GET):
+    def check_response (self, response):
        """
        Check final result and log it.
        """
@ -338,27 +353,17 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
            self.set_result(u"%r %s" % (response.status, response.reason),
                            valid=False)
        else:
-            if self.headers and self.headers.has_key("Server"):
-                server = self.headers['Server']
-            else:
-                server = _("unknown")
-            if fallback_GET:
-                self.add_info(_("Server %r did not support HEAD request; "\
-                                "a GET request was used instead.") % server)
-            if self.no_anchor:
-                self.add_warning(_("Server %r had no anchor support, removed"\
-                                   " anchor from request.") % server)
            if response.status == 204:
                # no content
                self.add_warning(
                            linkcheck.strformat.unicode_safe(response.reason))
            # store cookies for valid links
-            if self.consumer.config['cookies']:
+            if self.consumer.config('cookies'):
                for c in self.cookies:
                    self.add_info(_("Store cookie: %s.") % c)
                try:
-                    out = self.consumer.cache.store_cookies(self.headers,
-                                                            self.urlparts[1])
+                    out = self.consumer.store_cookies(self.headers,
+                                                      self.urlparts[1])
                    for h in out:
                        self.add_info(linkcheck.strformat.unicode_safe(h))
                except Cookie.CookieError, msg:
@ -414,9 +419,9 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
                                      linkcheck.configuration.UserAgent)
        self.url_connection.putheader("Accept-Encoding",
                                  "gzip;q=1.0, deflate;q=0.9, identity;q=0.5")
-        if self.consumer.config['cookies']:
-            self.cookies = self.consumer.cache.get_cookies(self.urlparts[1],
-                                                           self.urlparts[2])
+        if self.consumer.config('cookies'):
+            self.cookies = self.consumer.get_cookies(self.urlparts[1],
+                                                     self.urlparts[2])
            for c in self.cookies:
                self.url_connection.putheader("Cookie", c)
        self.url_connection.endheaders()
@ -439,7 +444,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
        """
        _user, _password = self.get_user_password()
        key = (scheme, self.urlparts[1], _user, _password)
-        conn = self.consumer.cache.get_connection(key)
+        conn = self.consumer.get_connection(key)
        if conn is not None:
            linkcheck.log.debug(linkcheck.LOG_CHECK,
                                "reuse cached HTTP(S) connection %s", conn)
@ -566,7 +571,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
        # add to cached connections
        _user, _password = self.get_user_password()
        key = ("http", self.urlparts[1], _user, _password)
-        cache_add = self.consumer.cache.add_connection
+        cache_add = self.consumer.add_connection
        # note: only cache the connection when it is persistent
        # and all pending content has been received
        if not self.persistent or not self.has_content or \
--- a/linkcheck/checker/nntpurl.py
+++ b/linkcheck/checker/nntpurl.py
@ -40,7 +40,7 @@ class NntpUrl (urlbase.UrlBase):
        Connect to NNTP server and try to request the URL article
        resource (if specified).
        """
-        nntpserver = self.host or self.consumer.config["nntpserver"]
+        nntpserver = self.host or self.consumer.config("nntpserver")
        if not nntpserver:
            self.add_warning(
                    _("No NNTP server was specified, skipping this URL."))
--- a/linkcheck/checker/proxysupport.py
+++ b/linkcheck/checker/proxysupport.py
@ -59,7 +59,7 @@ class ProxySupport (object):
        """
        Check if self.host is in the no-proxy-for ignore list.
        """
-        for ro in self.consumer.config["noproxyfor"]:
+        for ro in self.consumer.config("noproxyfor"):
            if ro.search(self.host):
                return True
        return False
--- a/linkcheck/checker/telneturl.py
+++ b/linkcheck/checker/telneturl.py
@ -59,7 +59,7 @@ class TelnetUrl (urlbase.UrlBase):
        label is "login: ", expected password label is "Password: ".
        """
        self.url_connection = telnetlib.Telnet()
-        if self.consumer.config.get("debug"):
+        if self.consumer.config("debug"):
            self.url_connection.set_debuglevel(1)
        self.url_connection.open(self.host, self.port)
        if self.user:
--- a/linkcheck/checker/urlbase.py
+++ b/linkcheck/checker/urlbase.py
@ -140,8 +140,9 @@ class UrlBase (object):
        self.urlparts = None
        # the anchor part of url
        self.anchor = None
-        # the result message string
+        # the result message string and flag
        self.result = u""
+        self.has_result = False
        # cached or not
        self.cached = False
        # valid or not
@ -166,11 +167,17 @@ class UrlBase (object):
        # cache keys, are set by build_url() calling set_cache_keys()
        self.cache_url_key = None
        self.cache_content_key = None
+        self.check_syntax()

    def set_result (self, msg, valid=True):
        """
        Set result string and validity.
        """
+        if self.has_result:
+            linkcheck.log.warn(linkcheck.LOG_CHECK,
+                  "Double result %r (previous %r)", msg, self.result)
+        else:
+            self.has_result = True
        self.result = msg
        self.valid = valid

@ -245,8 +252,8 @@ class UrlBase (object):
        linkcheck.log.debug(linkcheck.LOG_CACHE, "Content cache key %r",
                            self.cache_content_key)
        # construct cache key
-        if self.consumer.config["anchorcaching"] and \
-           self.consumer.config["anchors"]:
+        if self.consumer.config("anchorcaching") and \
+           self.consumer.config("anchors"):
            # do not ignore anchor
            parts = self.urlparts[:]
            parts[4] = self.anchor
@ -271,7 +278,7 @@ class UrlBase (object):
        linkcheck.log.debug(linkcheck.LOG_CHECK, "checking syntax")
        if not self.base_url:
            self.set_result(_("URL is empty"), valid=False)
-            return False
+            return
        try:
            self.build_url()
            # check url warnings
@ -282,10 +289,9 @@ class UrlBase (object):
        except linkcheck.LinkCheckerError, msg:
            self.set_result(linkcheck.strformat.unicode_safe(msg),
                            valid=False)
-            return False
+            return
        self.set_cache_keys()
        self.extern = self._get_extern(self.url)
-        return True

    def build_url (self):
        """
@ -338,7 +344,7 @@ class UrlBase (object):
        """
        Main check function for checking this URL.
        """
-        if self.consumer.config["trace"]:
+        if self.consumer.config("trace"):
            linkcheck.log.trace()
        try:
            self.local_check()
@ -372,11 +378,11 @@ class UrlBase (object):
        Local check function can be overridden in subclasses.
        """
        linkcheck.log.debug(linkcheck.LOG_CHECK, "Checking %s", self)
-        if self.recursion_level and self.consumer.config['wait']:
+        if self.recursion_level and self.consumer.config('wait'):
            linkcheck.log.debug(linkcheck.LOG_CHECK,
                                "sleeping for %d seconds",
-                                self.consumer.config['wait'])
-            time.sleep(self.consumer.config['wait'])
+                                self.consumer.config('wait'))
+            time.sleep(self.consumer.config('wait'))
        t = time.time()
        if self.is_extern():
            self.add_info(_("Outside of domain filter, checked only syntax."))
@ -387,7 +393,7 @@ class UrlBase (object):
        try:
            self.check_connection()
            self.add_country_info()
-            if self.consumer.config["anchors"]:
+            if self.consumer.config("anchors"):
                self.check_anchors()
        except tuple(linkcheck.checker.ExcList):
            etype, evalue, etb = sys.exc_info()
@ -403,7 +409,7 @@ class UrlBase (object):
                            valid=False)

        # check content
-        warningregex = self.consumer.config["warningregex"]
+        warningregex = self.consumer.config("warningregex")
        if warningregex and self.valid:
            linkcheck.log.debug(linkcheck.LOG_CHECK, "checking content")
            try:
@ -469,8 +475,8 @@ class UrlBase (object):
        return self.valid and \
            self.is_parseable() and \
            self.can_get_content() and \
-            (self.consumer.config["recursionlevel"] < 0 or
-            self.recursion_level < self.consumer.config["recursionlevel"]) and \
+            (self.consumer.config("recursionlevel") < 0 or
+            self.recursion_level < self.consumer.config("recursionlevel")) and \
            not self.extern[0] and self.content_allows_robots()

    def content_allows_robots (self):
@ -533,13 +539,13 @@ class UrlBase (object):
        @return: a tuple (is_extern, is_strict)
        @rtype: tuple (bool, bool)
        """
-        for entry in self.consumer.config["externlinks"]:
+        for entry in self.consumer.config("externlinks"):
            match = entry['pattern'].search(url)
            if (entry['negate'] and not match) or \
               (match and not entry['negate']):
                linkcheck.log.debug(linkcheck.LOG_CHECK, "Extern URL %r", url)
                return (1, entry['strict'])
-        for entry in self.consumer.config["internlinks"]:
+        for entry in self.consumer.config("internlinks"):
            match = entry['pattern'].search(url)
            if (entry['negate'] and not match) or \
               (match and not entry['negate']):
@ -582,7 +588,7 @@ class UrlBase (object):
        If a maximum size was given, call this function to check it
        against the content size of this url.
        """
-        maxbytes = self.consumer.config["warnsizebytes"]
+        maxbytes = self.consumer.config("warnsizebytes")
        if maxbytes is not None and self.dlsize >= maxbytes:
            self.add_warning(_("Content size %s is larger than %s.") % \
                         (linkcheck.strformat.strsize(self.dlsize),
@ -602,7 +608,7 @@ class UrlBase (object):
        Get tuple (user, password) from configured authentication.
        Both user and password can be None if not specified.
        """
-        for auth in self.consumer.config["authentication"]:
+        for auth in self.consumer.config("authentication"):
            if auth['pattern'].match(self.url):
                return auth['user'], auth['password']
        return None, None
@ -728,7 +734,7 @@ class UrlBase (object):
        @rtype: string
        """
        s = self.serialized()
-        return self.consumer.config['logger'].encode(s)
+        return self.consumer.config('logger').encode(s)

    def __repr__ (self):
        """
--- a/linkcheck/decorators.py
+++ b/linkcheck/decorators.py
@ -4,6 +4,7 @@ Simple decorators (usable in Python >= 2.4).
 import warnings
 import signal
 import os
+import thread

 def deprecated (func):
    """
--- a/linkcheck/ftests/init.py
+++ b/linkcheck/ftests/init.py
@ -163,9 +163,9 @@ class StandardTest (unittest.TestCase):
                                      url, 0, consumer, cmdline=cmdline)
        consumer.append_url(url_data)
        linkcheck.checker.check_urls(consumer)
-        if consumer.config['logger'].diff:
+        if consumer.config('logger').diff:
            sep = unicode(os.linesep)
-            l = [url] + consumer.config['logger'].diff
+            l = [url] + consumer.config('logger').diff
            l = sep.join(l)
            self.fail(l.encode("iso8859-1", "ignore"))

@ -187,8 +187,8 @@ class StandardTest (unittest.TestCase):
                                          url, 0, consumer, cmdline=cmdline)
        consumer.append_url(url_data)
        linkcheck.checker.check_urls(consumer)
-        if consumer.config['logger'].diff:
+        if consumer.config('logger').diff:
            sep = unicode(os.linesep)
-            l = [url] + consumer.config['logger'].diff
+            l = [url] + consumer.config('logger').diff
            l = sep.join(l)
            self.fail(l.encode("iso8859-1", "ignore"))
--- a/linkcheck/i18n.py
+++ b/linkcheck/i18n.py
@ -145,9 +145,9 @@ def get_locale ():
    loc = None
    try:
        loc = locale.getdefaultlocale()[0]
-    except ValueError:
-        # workaround (XXX delete this when python2.5 is fixed)
-        pass
+    except ValueError, msg:
+        # workaround for XXX
+        print >>sys.stderr, "WARNING", msg
    if loc is None:
        return 'C'
    loc = locale.normalize(loc)
--- a/linkcheck/log.py
+++ b/linkcheck/log.py
@ -29,6 +29,7 @@ import cStringIO as StringIO
 import linecache
 import sys
 import re
+import time
 try:
    import thread as _thread
 except ImportError:
@ -71,8 +72,8 @@ def _traceit (frame, event, arg):
        if filename.endswith(".pyc") or filename.endswith(".pyo"):
            filename = filename[:-1]
        line = linecache.getline(filename, lineno)
-        print "THREAD(%d) %s:%d: %s" % \
-                     (_thread.get_ident(), name, lineno, line.rstrip())
+        print "THREAD(%d) %.2f %s:%d: %s" % \
+               (_thread.get_ident(), time.time(), name, lineno, line.rstrip())
    return _traceit

 def trace ():