diff --git a/cgi-bin/lc.fcgi b/cgi-bin/lc.fcgi index 0b7fff4f..432b3da6 100755 --- a/cgi-bin/lc.fcgi +++ b/cgi-bin/lc.fcgi @@ -34,7 +34,7 @@ try: form=req.getFieldStorage(), env=req.env) req.Finish() -except: +except Exception: import traceback traceback.print_exc(file = open('traceback', 'a')) diff --git a/doc/rest2htmlnav b/doc/rest2htmlnav index 2ae207ad..1cc577be 100755 --- a/doc/rest2htmlnav +++ b/doc/rest2htmlnav @@ -11,7 +11,7 @@ Produces custom HTML and machine-parseable navigation info try: import locale locale.setlocale(locale.LC_ALL, '') -except: +except Exception: pass from docutils.core import publish_cmdline, default_description diff --git a/install-linkchecker.py b/install-linkchecker.py index cec6f6f7..d4583b7f 100644 --- a/install-linkchecker.py +++ b/install-linkchecker.py @@ -15,15 +15,16 @@ import sys if not sys.platform.startswith('win'): # not for us sys.exit() -if not hasattr(sys, "version_info"): - raise SystemExit, "This program requires Python 2.4 or later." -if sys.version_info < (2, 4, 0, 'final', 0): - raise SystemExit, "This program requires Python 2.4 or later." +if not (hasattr(sys, 'version_info') or + sys.version_info < (2, 5, 0, 'final', 0)): + raise SystemExit("This program requires Python 2.5 or later.") +from __future__ import with_statement import os import re import platform # releases supporting our special .bat files +# XXX what is platform.release() on Vista? win_bat_releases = ['NT', 'XP', '2000', '2003Server'] # path retrieving functions @@ -92,20 +93,17 @@ def create_shortcuts (): def fix_configdata (): - """ - Fix install and config paths in the config file. - """ + """Fix install and config paths in the config file.""" name = "_linkchecker_configdata.py" conffile = os.path.join(sys.prefix, "Lib", "site-packages", name) lines = [] for line in file(conffile): - if line.startswith("install_") or line.startswith("config_"): + if line.startswith(("install_", "config_")): lines.append(fix_install_path(line)) else: lines.append(line) - f = file(conffile, "w") - f.write("".join(lines)) - f.close() + with file(conffile, "w") as f: + f.write("".join(lines)) # Windows install path scheme for python >= 2.3. # Snatched from PC/bdist_wininst/install.c. @@ -122,10 +120,8 @@ win_path_scheme = { } def fix_install_path (line): - """ - Replace placeholders written by bdist_wininst with those specified - in windows install path scheme. - """ + """Replace placeholders written by bdist_wininst with those specified + in windows install path scheme.""" key, eq, val = line.split() # unescape string (do not use eval()) val = val[1:-1].replace("\\\\", "\\") diff --git a/linkcheck/HtmlParser/htmllib.py b/linkcheck/HtmlParser/htmllib.py index f4ed4866..2844e059 100644 --- a/linkcheck/HtmlParser/htmllib.py +++ b/linkcheck/HtmlParser/htmllib.py @@ -125,7 +125,7 @@ class HtmlPrettyPrinter (object): """ tag = tag.encode(self.encoding, "ignore") self.fd.write("<%s" % tag.replace("/", "")) - for key, val in attrs.iteritems(): + for key, val in attrs.items(): key = key.encode(self.encoding, "ignore") if val is None: self.fd.write(" %s" % key) diff --git a/linkcheck/HtmlParser/htmlsax.h b/linkcheck/HtmlParser/htmlsax.h index db69162c..9c4812d0 100644 --- a/linkcheck/HtmlParser/htmlsax.h +++ b/linkcheck/HtmlParser/htmlsax.h @@ -22,20 +22,13 @@ #include "Python.h" -/* require Python >= 2.4 */ +/* require Python >= 2.5 */ #ifndef PY_VERSION_HEX -#error please install Python >= 2.4 +#error please install Python >= 2.5 #endif -#if PY_VERSION_HEX < 0x02040000 -#error please install Python >= 2.4 -#endif - -/* See http://www.python.org/dev/peps/pep-0353/#conversion-guidelines */ -#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) -typedef int Py_ssize_t; -#define PY_SSIZE_T_MAX INT_MAX -#define PY_SSIZE_T_MIN INT_MIN +#if PY_VERSION_HEX < 0x02050000 +#error please install Python >= 2.5 #endif /* user_data type for SAX calls */ diff --git a/linkcheck/HtmlParser/s_util.c b/linkcheck/HtmlParser/s_util.c index 19c0db1d..7611d9a7 100644 --- a/linkcheck/HtmlParser/s_util.c +++ b/linkcheck/HtmlParser/s_util.c @@ -5,29 +5,6 @@ */ #include "Python.h" -#ifndef Py_MEMCPY -/* Py_MEMCPY can be used instead of memcpy in cases where the copied blocks - * are often very short. While most platforms have highly optimized code for - * large transfers, the setup costs for memcpy are often quite high. MEMCPY - * solves this by doing short copies "in line". - */ - -#if defined(_MSC_VER) -#define Py_MEMCPY(target, source, length) do { \ - size_t i_, n_ = (length); \ - char *t_ = (void*) (target); \ - const char *s_ = (void*) (source); \ - if (n_ >= 16) \ - memcpy(t_, s_, n_); \ - else \ - for (i_ = 0; i_ < n_; i_++) \ - t_[i_] = s_[i_]; \ - } while (0) -#else -#define Py_MEMCPY memcpy -#endif -#endif - #if !defined(HAVE_STRLCPY) /** * strlcpy - Copy a %NUL terminated string into a sized buffer diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index a3d27c2c..4f44451d 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -20,13 +20,13 @@ Main function module for link checking. # imports and checks import sys -if not hasattr(sys, 'version_info') or \ - sys.version_info < (2, 4, 0, 'final', 0): - raise SystemExit("This program requires Python 2.4 or later.") +if not (hasattr(sys, 'version_info') or + sys.version_info < (2, 5, 0, 'final', 0)): + raise SystemExit("This program requires Python 2.5 or later.") import os import re -import i18n +from . import i18n import _linkchecker_configdata as configdata # application log areas @@ -45,33 +45,27 @@ lognames = { "dns": LOG_DNS, "thread": LOG_THREAD, "all": LOG, - } -lognamelist = ", ".join(["%r"%name for name in lognames.iterkeys()]) +} +lognamelist = ", ".join(repr(name) for name in lognames) -import log +from . import log class LinkCheckerError (StandardError): - """ - Exception to be raised on linkchecker-specific check errors. - """ + """Exception to be raised on linkchecker-specific check errors.""" pass def add_intern_pattern (url_data, config): - """ - Add intern URL regex to config. - """ + """Add intern URL regex to config.""" pat = url_data.get_intern_pattern() if pat: - assert None == log.debug(LOG_CHECK, - "Add intern pattern %r", pat) + log.debug(LOG_CHECK, "Add intern pattern %r", pat) config['internlinks'].append(get_link_pat(pat)) def get_link_pat (arg, strict=False): - """ - Get a link pattern matcher for intern/extern links. + """Get a link pattern matcher for intern/extern links. Returns a compiled pattern and a negate and strict option. @param arg: pattern from config @@ -81,7 +75,7 @@ def get_link_pat (arg, strict=False): @return: dictionary with keys 'pattern', 'negate' and 'strict' @rtype: dict """ - assert None == log.debug(LOG_CHECK, "Link pattern %r", arg) + log.debug(LOG_CHECK, "Link pattern %r", arg) if arg.startswith('!'): pattern = arg[1:] negate = True @@ -96,38 +90,37 @@ def get_link_pat (arg, strict=False): # note: don't confuse URL loggers with application logs above -import logger.text -import logger.html -import logger.gml -import logger.dot -import logger.sql -import logger.csvlog -import logger.blacklist -import logger.gxml -import logger.customxml -import logger.none +from .logger.text import TextLogger +from .logger.html import HtmlLogger +from .logger.gml import GMLLogger +from .logger.dot import DOTLogger +from .logger.sql import SQLLogger +from .logger.csvlog import CSVLogger +from .logger.blacklist import BlacklistLogger +from .logger.gxml import GraphXMLLogger +from .logger.customxml import CustomXMLLogger +from .logger.none import NoneLogger # default link logger classes Loggers = { - "text": logger.text.TextLogger, - "html": logger.html.HtmlLogger, - "gml": logger.gml.GMLLogger, - "dot": logger.dot.DOTLogger, - "sql": logger.sql.SQLLogger, - "csv": logger.csvlog.CSVLogger, - "blacklist": logger.blacklist.BlacklistLogger, - "gxml": logger.gxml.GraphXMLLogger, - "xml": logger.customxml.CustomXMLLogger, - "none": logger.none.NoneLogger, + "text": TextLogger, + "html": HtmlLogger, + "gml": GMLLogger, + "dot": DOTLogger, + "sql": SQLLogger, + "csv": CSVLogger, + "blacklist": BlacklistLogger, + "gxml": GraphXMLLogger, + "xml": CustomXMLLogger, + "none": NoneLogger, } # for easy printing: a comma separated logger list -LoggerKeys = ", ".join(["%r" % name for name in Loggers.iterkeys()]) +LoggerKeys = ", ".join(repr(name) for name in Loggers) def init_i18n (): - """ - Initialize i18n with the configured locale dir. The environment + """Initialize i18n with the configured locale dir. The environment variable LOCPATH can also specify a locale dir. @return: None diff --git a/linkcheck/ansicolor.py b/linkcheck/ansicolor.py index ff5c18c4..4703607c 100644 --- a/linkcheck/ansicolor.py +++ b/linkcheck/ansicolor.py @@ -162,9 +162,7 @@ AnsiReset = esc_ansicolor(default) def has_colors (fp): - """ - Test if given file is an ANSI color enabled tty. - """ + """Test if given file is an ANSI color enabled tty.""" # The isatty() function ensures that we do not colorize # redirected streams, as this is almost never what we want if not (hasattr(fp, "isatty") and fp.isatty()): @@ -188,9 +186,7 @@ def has_colors (fp): def _write_color_nt (fp, text, color): - """ - Assumes WConio has been imported at module level. - """ + """Assumes WConio has been imported at module level.""" oldcolor = WConio.gettextinfo()[4] oldtextcolor = oldcolor & 0x000F if ";" in color: @@ -201,9 +197,7 @@ def _write_color_nt (fp, text, color): def _write_color_ansi (fp, text, color): - """ - Colorize text with given color. - """ + """Colorize text with given color.""" fp.write('%s%s%s' % (esc_ansicolor(color), text, AnsiReset)) @@ -234,14 +228,10 @@ else: class Colorizer (object): - """ - Prints colored messages to streams. - """ + """Prints colored messages to streams.""" def __init__ (self, fp): - """ - Initialize with given stream (file-like object). - """ + """Initialize with given stream (file-like object).""" super(Colorizer, self).__init__() self.fp = fp if has_colors(fp): @@ -250,24 +240,18 @@ class Colorizer (object): self.write = self._write def _write (self, text, color=None): - """ - Print text as-is. - """ + """Print text as-is.""" self.fp.write(text) def _write_color (self, text, color=None): - """ - Print text with given color. If color is None, print text as-is. - """ + """Print text with given color. If color is None, print text as-is.""" if color is None: self.fp.write(text) else: write_color(self.fp, text, color) def __getattr__ (self, name): - """ - Delegate attribute access to the stored stream object. - """ + """Delegate attribute access to the stored stream object.""" return getattr(self.fp, name) @@ -275,8 +259,7 @@ class ColoredStreamHandler (logging.StreamHandler, object): """Send colored log messages to streams (file-like objects).""" def __init__ (self, strm=None): - """ - Log to given stream (a file-like object) or to stderr if + """Log to given stream (a file-like object) or to stderr if strm is None. """ super(ColoredStreamHandler, self).__init__(strm=strm) @@ -290,14 +273,12 @@ class ColoredStreamHandler (logging.StreamHandler, object): } def get_color (self, record): - """ - Get appropriate color according to log level. + """Get appropriate color according to log level. """ return self.colors.get(record.levelno, 'default') def emit (self, record): - """ - Emit a record. + """Emit a record. If a formatter is specified, it is used to format the record. The record is then written to the stream with a trailing newline diff --git a/linkcheck/cache/connection.py b/linkcheck/cache/connection.py index 0728544a..1f130c0e 100644 --- a/linkcheck/cache/connection.py +++ b/linkcheck/cache/connection.py @@ -20,7 +20,7 @@ Store and retrieve open connections. import time import linkcheck.lock -import linkcheck.log +from .. import log, LOG_CACHE from linkcheck.decorators import synchronized _lock = linkcheck.lock.get_lock("connection") @@ -74,7 +74,7 @@ class ConnectionPool (object): due_time = self.times[host] if due_time > t: wait = due_time - t - assert None == linkcheck.log.debug(linkcheck.LOG_CACHE, + log.debug(LOG_CACHE, "waiting for %.01f seconds on connection to %s", wait, host) time.sleep(wait) t = time.time() @@ -119,7 +119,7 @@ class ConnectionPool (object): """Remove expired connections from this pool.""" t = time.time() to_delete = [] - for key, conn_data in self.connections.iteritems(): + for key, conn_data in self.connections.items(): if conn_data[1] == 'available' and t > conn_data[2]: to_delete.append(key) for key in to_delete: @@ -132,7 +132,7 @@ class ConnectionPool (object): del self.connections[key] try: conn_data[1].close() - except: + except Exception: # ignore close errors pass diff --git a/linkcheck/cache/cookie.py b/linkcheck/cache/cookie.py index 78c6232a..b69d35bc 100644 --- a/linkcheck/cache/cookie.py +++ b/linkcheck/cache/cookie.py @@ -18,7 +18,7 @@ Store and retrieve cookies. """ from linkcheck.decorators import synchronized -import linkcheck.log +from .. import log, LOG_CACHE import linkcheck.lock import linkcheck.cookies @@ -46,7 +46,7 @@ class CookieJar (object): c = linkcheck.cookies.NetscapeCookie(h, scheme, host, path) jar.add(c) except linkcheck.cookies.CookieError: - assert None == linkcheck.log.debug(linkcheck.LOG_CACHE, + log.debug(LOG_CACHE, "Invalid cookie header for %s:%s%s: %r", scheme, host, path, h) for h in headers.getallmatchingheaders("Set-Cookie2"): # RFC 2965 cookie type @@ -54,7 +54,7 @@ class CookieJar (object): c = linkcheck.cookies.Rfc2965Cookie(h, scheme, host, path) jar.add(c) except linkcheck.cookies.CookieError: - assert None == linkcheck.log.debug(linkcheck.LOG_CACHE, + log.debug(LOG_CACHE, "Invalid cookie2 header for %s:%s%s: %r", scheme, host, path, h) self.cache[host] = jar return jar @@ -64,8 +64,7 @@ class CookieJar (object): """ Cookie cache getter function. """ - assert None == linkcheck.log.debug(linkcheck.LOG_CACHE, - "Get cookies for host %r path %r", host, path) + log.debug(LOG_CACHE, "Get cookies for host %r path %r", host, path) jar = self.cache.setdefault(host, set()) return [x for x in jar if x.check_expired() and \ x.is_valid_for(scheme, host, port, path)] diff --git a/linkcheck/cache/urlqueue.py b/linkcheck/cache/urlqueue.py index 180e63d7..7f09dc3e 100644 --- a/linkcheck/cache/urlqueue.py +++ b/linkcheck/cache/urlqueue.py @@ -17,10 +17,11 @@ """ Handle a queue of URLs to check. """ +from __future__ import with_statement import threading import collections from time import time as _time -import linkcheck.log +from .. import log, LOG_CACHE class Timeout (StandardError): @@ -79,11 +80,8 @@ class UrlQueue (object): return it. If no such url is available return None. The url might be already cached. """ - self.not_empty.acquire() - try: + with self.not_empty: return self._get(timeout) - finally: - self.not_empty.release() def _get (self, timeout): if timeout is None: @@ -126,20 +124,16 @@ class UrlQueue (object): is immediately available, else raise the Full exception ('timeout' is ignored in that case). """ - self.mutex.acquire() - try: + with self.mutex: self._put(item) self.not_empty.notify() - finally: - self.mutex.release() def _put (self, url_data): """Put URL in queue, increase number of unfished tasks.""" if self.shutdown: # don't accept more URLs return - assert None == linkcheck.log.debug(linkcheck.LOG_CACHE, - "queueing %s", url_data) + log.debug(LOG_CACHE, "queueing %s", url_data) key = url_data.cache_url_key if key in self.checked: # Put at beginning of queue to get consumed quickly. @@ -171,10 +165,8 @@ class UrlQueue (object): Raises a ValueError if called more times than there were items placed in the queue. """ - self.all_tasks_done.acquire() - try: - assert None == linkcheck.log.debug(linkcheck.LOG_CACHE, - "task_done %s", url_data) + with self.all_tasks_done: + log.debug(LOG_CACHE, "task_done %s", url_data) if url_data is not None: key = url_data.cache_url_key if key is not None and key not in self.checked: @@ -188,13 +180,10 @@ class UrlQueue (object): raise ValueError('task_done() called too many times') self.all_tasks_done.notifyAll() self.unfinished_tasks = unfinished - finally: - self.all_tasks_done.release() def _cache_url (self, key, url_data): """Put URL result data into cache.""" - assert None == linkcheck.log.debug(linkcheck.LOG_CACHE, - "Caching %r", key) + log.debug(LOG_CACHE, "Caching %r", key) assert key in self.in_progress, \ "%r not in %s" % (key, self.in_progress) del self.in_progress[key] @@ -206,8 +195,7 @@ class UrlQueue (object): for key in url_data.aliases: if key in self.checked or key in self.in_progress: continue - assert None == linkcheck.log.debug(linkcheck.LOG_CACHE, - "Caching alias %r", key) + log.debug(LOG_CACHE, "Caching alias %r", key) self.checked[key] = data def _sort (self): @@ -237,8 +225,7 @@ class UrlQueue (object): When the count of unfinished tasks drops to zero, join() unblocks. """ - self.all_tasks_done.acquire() - try: + with self.all_tasks_done: if timeout is None: while self.unfinished_tasks: self.all_tasks_done.wait() @@ -251,13 +238,10 @@ class UrlQueue (object): if remaining <= 0.0: raise Timeout() self.all_tasks_done.wait(remaining) - finally: - self.all_tasks_done.release() def do_shutdown (self): """Shutdown the queue by not accepting any more URLs.""" - self.mutex.acquire() - try: + with self.mutex: unfinished = self.unfinished_tasks - len(self.queue) self.queue.clear() if unfinished <= 0: @@ -266,19 +250,14 @@ class UrlQueue (object): self.all_tasks_done.notifyAll() self.unfinished_tasks = unfinished self.shutdown = True - finally: - self.mutex.release() def status (self): """ Get tuple (finished tasks, in progress, queue size). """ - self.mutex.acquire() - try: + with self.mutex: return (self.finished_tasks, len(self.in_progress), len(self.queue)) - finally: - self.mutex.release() def checked_redirect (self, redirect, url_data): """ @@ -287,11 +266,8 @@ class UrlQueue (object): If the redirect URL is found in the cache, the result data is already copied. """ - self.mutex.acquire() - try: + with self.mutex: if redirect in self.checked: url_data.copy_from_cache(self.checked[redirect]) return True return False - finally: - self.mutex.release() diff --git a/linkcheck/checker/__init__.py b/linkcheck/checker/__init__.py index cf37a9fa..c341e618 100644 --- a/linkcheck/checker/__init__.py +++ b/linkcheck/checker/__init__.py @@ -113,9 +113,7 @@ def get_urlclass_from (url): klass = linkcheck.checker.gopherurl.GopherUrl elif url.startswith("https:"): klass = linkcheck.checker.httpsurl.HttpsUrl - elif url.startswith("nntp:") or \ - url.startswith("news:") or \ - url.startswith("snews:"): + elif url.startswith(("nntp:", "news:", "snews:")): klass = linkcheck.checker.nntpurl.NntpUrl elif linkcheck.checker.unknownurl.is_unknown_url(url): # unknown url diff --git a/linkcheck/checker/fileurl.py b/linkcheck/checker/fileurl.py index 01ea62c0..443942d6 100644 --- a/linkcheck/checker/fileurl.py +++ b/linkcheck/checker/fileurl.py @@ -26,7 +26,7 @@ import urllib import urllib2 import urlbase -import linkcheck.log +from .. import log, LOG_CHECK import linkcheck.checker import linkcheck.fileutil from const import WARN_FILE_MISSING_SLASH, WARN_FILE_SYSTEM_PATH, \ @@ -68,8 +68,7 @@ def get_nt_filename (path): for fname in os.listdir(head): if fname.lower() == tail.lower(): return os.path.join(get_nt_filename(head), fname) - linkcheck.log.error(linkcheck.LOG_CHECK, "could not find %r in %r", - tail, head) + log.error(LOG_CHECK, "could not find %r in %r", tail, head) return path @@ -234,12 +233,12 @@ class FileUrl (urlbase.UrlBase): if self.is_directory(): return True # guess by extension - for ro in PARSE_EXTENSIONS.itervalues(): + for ro in PARSE_EXTENSIONS.values(): if ro.search(self.url): return True # try to read content (can fail, so catch error) try: - for ro in PARSE_CONTENTS.itervalues(): + for ro in PARSE_CONTENTS.values(): if ro.search(self.get_content()[:30]): return True except IOError: @@ -253,11 +252,11 @@ class FileUrl (urlbase.UrlBase): if self.is_directory(): self.parse_html() return - for key, ro in PARSE_EXTENSIONS.iteritems(): + for key, ro in PARSE_EXTENSIONS.items(): if ro.search(self.url): getattr(self, "parse_"+key)() return - for key, ro in PARSE_CONTENTS.iteritems(): + for key, ro in PARSE_CONTENTS.items(): if ro.search(self.get_content()[:30]): getattr(self, "parse_"+key)() return diff --git a/linkcheck/checker/ftpurl.py b/linkcheck/checker/ftpurl.py index 6bff9e0f..915a45a3 100644 --- a/linkcheck/checker/ftpurl.py +++ b/linkcheck/checker/ftpurl.py @@ -23,6 +23,7 @@ import time import urllib import cStringIO as StringIO +from .. import log, LOG_CHECK import linkcheck import proxysupport import httpurl @@ -97,7 +98,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): self.aggregate.connections.wait_for_host(host) try: self.url_connection = ftplib.FTP() - if linkcheck.log.is_debug(linkcheck.LOG_CHECK): + if log.is_debug(LOG_CHECK): self.url_connection.set_debuglevel(1) self.url_connection.connect(host) if _user is None: @@ -136,8 +137,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): if not self.filename: return files = self.get_files() - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "FTP files %s", str(files)) + log.debug(LOG_CHECK, "FTP files %s", str(files)) if self.filename in files: # file found return @@ -161,8 +161,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): """ Parse list line and add the entry it points to to the file list. """ - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "Directory entry %r", line) + log.debug(LOG_CHECK, "Directory entry %r", line) try: fpo = ftpparse.parse(line) name = fpo.name @@ -171,8 +170,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): if fpo.trycwd or fpo.tryretr: files.append(name) except (ValueError, AttributeError), msg: - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "%s (%s)", str(msg), line) + log.debug(LOG_CHECK, "%s (%s)", str(msg), line) self.url_connection.dir(add_entry) return files @@ -194,7 +192,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): """ if self.is_directory(): return True - for ro in PARSE_EXTENSIONS.itervalues(): + for ro in PARSE_EXTENSIONS.values(): if ro.search(self.url): return True return False @@ -212,7 +210,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): if self.is_directory(): self.parse_html() return - for key, ro in PARSE_EXTENSIONS.iteritems(): + for key, ro in PARSE_EXTENSIONS.items(): if ro.search(self.url): getattr(self, "parse_"+key)() diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py index 68fe4923..f37ca421 100644 --- a/linkcheck/checker/httpurl.py +++ b/linkcheck/checker/httpurl.py @@ -27,6 +27,7 @@ import socket import cStringIO as StringIO import Cookie +from .. import log, LOG_CHECK import linkcheck.url import linkcheck.strformat import linkcheck.robotparser2 @@ -197,8 +198,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): newurl = urlparse.urlunsplit(self.urlparts) if self.url != newurl: if self.warn_redirect: - linkcheck.log.warn(linkcheck.LOG_CHECK, - _("""URL %s has been redirected. + log.warn(LOG_CHECK, _("""URL %s has been redirected. Use URL %s instead for checking."""), self.url, newurl) self.url = newurl # check response @@ -230,10 +230,9 @@ Use URL %s instead for checking."""), self.url, newurl) raise if response.reason: response.reason = unicode_safe(response.reason) - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, + log.debug(LOG_CHECK, "Response: %s %s", response.status, response.reason) - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "Headers: %s", self.headers) + log.debug(LOG_CHECK, "Headers: %s", self.headers) # proxy enforcement (overrides standard proxy) if response.status == 305 and self.headers: oldproxy = (self.proxy, self.proxyauth) @@ -260,8 +259,7 @@ Use URL %s instead for checking."""), self.url, newurl) continue raise if tries == -1: - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "already handled") + log.debug(LOG_CHECK, "already handled") response.close() return None if tries >= self.max_redirects: @@ -281,7 +279,7 @@ Use URL %s instead for checking."""), self.url, newurl) _user, _password = self.get_user_password() self.auth = "Basic " + \ base64.encodestring("%s:%s" % (_user, _password)) - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, + log.debug(LOG_CHECK, "Authentication %s/%s", _user, _password) continue elif response.status >= 400: @@ -312,8 +310,7 @@ Use URL %s instead for checking."""), self.url, newurl) """ Follow all redirections of http response. """ - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "follow all redirections") + log.debug(LOG_CHECK, "follow all redirections") redirected = self.url tries = 0 while response.status in [301, 302] and self.headers and \ @@ -323,15 +320,13 @@ Use URL %s instead for checking."""), self.url, newurl) # make new url absolute and unicode newurl = urlparse.urljoin(redirected, newurl) newurl = unicode_safe(newurl) - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "Redirected to %r", newurl) + log.debug(LOG_CHECK, "Redirected to %r", newurl) self.add_info(_("Redirected to %(url)s.") % {'url': newurl}) # norm base url - can raise UnicodeError from url.idna_encode() redirected, is_idn = linkcheck.checker.urlbase.url_norm(newurl) if is_idn: pass # XXX warn about idn use - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "Norm redirected to %r", redirected) + log.debug(LOG_CHECK, "Norm redirected to %r", redirected) urlparts = linkcheck.strformat.url_unicode_split(redirected) # check extern filter again self.set_extern(redirected) @@ -458,8 +453,7 @@ Use URL %s instead for checking."""), self.url, newurl) else: host = self.urlparts[1] scheme = self.urlparts[0] - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "Connecting to %r", host) + log.debug(LOG_CHECK, "Connecting to %r", host) # close/release a previous connection self.close_connection() self.url_connection = self.get_http_object(host, scheme) @@ -538,8 +532,7 @@ Use URL %s instead for checking."""), self.url, newurl) key = (scheme, self.urlparts[1], _user, _password) conn = self.aggregate.connections.get(key) if conn is not None: - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "reuse cached HTTP(S) connection %s", conn) + log.debug(LOG_CHECK, "reuse cached HTTP(S) connection %s", conn) return conn self.aggregate.connections.wait_for_host(host) if scheme == "http": @@ -549,7 +542,7 @@ Use URL %s instead for checking."""), self.url, newurl) else: msg = _("Unsupported HTTP url scheme %r") % scheme raise linkcheck.LinkCheckerError(msg) - if linkcheck.log.is_debug(linkcheck.LOG_CHECK): + if log.is_debug(LOG_CHECK): h.set_debuglevel(1) h.connect() return h @@ -691,7 +684,7 @@ Use URL %s instead for checking."""), self.url, newurl) else: try: self.url_connection.close() - except: + except Exception: # ignore close errors pass self.url_connection = None diff --git a/linkcheck/checker/mailtourl.py b/linkcheck/checker/mailtourl.py index cf120d34..9cf40796 100644 --- a/linkcheck/checker/mailtourl.py +++ b/linkcheck/checker/mailtourl.py @@ -24,7 +24,7 @@ import smtplib import email.Utils import urlbase -import linkcheck.log +from .. import log, LOG_CHECK import linkcheck.strformat import linkcheck.dns.resolver from const import WARN_MAIL_NO_ADDRESSES, WARN_MAIL_NO_MX_HOST, \ @@ -75,8 +75,7 @@ class MailtoUrl (urlbase.UrlBase): username, domain = _split_address(addr) if not linkcheck.url.is_safe_domain(domain): raise linkcheck.LinkCheckerError(_("Invalid mail syntax")) - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "addresses: %s", self.addresses) + log.debug(LOG_CHECK, "addresses: %s", self.addresses) def cutout_addresses (self): """ @@ -111,7 +110,7 @@ class MailtoUrl (urlbase.UrlBase): if i < (len(url) - 1): try: headers = cgi.parse_qs(url[(i+1):], strict_parsing=True) - for key, val in headers.iteritems(): + for key, val in headers.items(): self.headers.setdefault(key.lower(), []).extend(val) except ValueError, err: self.add_warning(_("Error parsing CGI values: %s") % str(err)) @@ -148,14 +147,10 @@ class MailtoUrl (urlbase.UrlBase): """ Check a single mail address. """ - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "checking mail address %r", mail) - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "splitting address") + log.debug(LOG_CHECK, "checking mail address %r", mail) mail = linkcheck.strformat.ascii_safe(mail) username, domain = _split_address(mail) - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "looking up MX mailhost %r", domain) + log.debug(LOG_CHECK, "looking up MX mailhost %r", domain) try: answers = linkcheck.dns.resolver.query(domain, 'MX') except linkcheck.dns.resolver.NoAnswer: @@ -180,10 +175,9 @@ class MailtoUrl (urlbase.UrlBase): # host should be preferred) mxdata.sort() # debug output - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "found %d MX mailhosts:", len(answers)) + log.debug(LOG_CHECK, "found %d MX mailhosts:", len(answers)) for preference, host in mxdata: - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, + log.debug(LOG_CHECK, "MX host %r, preference %d", host, preference) # connect self.check_smtp_connect(mxdata, username, domain) @@ -200,19 +194,17 @@ class MailtoUrl (urlbase.UrlBase): smtpconnect = 0 for preference, host in mxdata: try: - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, + log.debug(LOG_CHECK, "SMTP check for %r (preference %d)", host, preference) self.url_connection = smtplib.SMTP() - if linkcheck.log.is_debug(linkcheck.LOG_CHECK): + if log.is_debug(LOG_CHECK): self.url_connection.set_debuglevel(1) self.url_connection.connect(host) - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "SMTP connected!") + log.debug(LOG_CHECK, "SMTP connected!") smtpconnect = 1 self.url_connection.helo() info = self.url_connection.verify("%s@%s" % (username, domain)) - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "SMTP user info %r", info) + log.debug(LOG_CHECK, "SMTP user info %r", info) d = {'info': str(info[1])} if info[0] == 250: self.add_info(_("Verified address: %(info)s.") % d) diff --git a/linkcheck/checker/nntpurl.py b/linkcheck/checker/nntpurl.py index 3ef5feee..96587326 100644 --- a/linkcheck/checker/nntpurl.py +++ b/linkcheck/checker/nntpurl.py @@ -26,7 +26,6 @@ import random import linkcheck import urlbase -import linkcheck.log from const import WARN_NNTP_NO_SERVER, WARN_NNTP_NO_NEWSGROUP, \ WARN_NNTP_BUSY diff --git a/linkcheck/checker/telneturl.py b/linkcheck/checker/telneturl.py index 21b53258..ffcf152e 100644 --- a/linkcheck/checker/telneturl.py +++ b/linkcheck/checker/telneturl.py @@ -22,7 +22,7 @@ import telnetlib import urllib import urlbase -import linkcheck.log +from .. import log, LOG_CHECK class TelnetUrl (urlbase.UrlBase): @@ -60,7 +60,7 @@ class TelnetUrl (urlbase.UrlBase): label is "login: ", expected password label is "Password: ". """ self.url_connection = telnetlib.Telnet() - if linkcheck.log.is_debug(linkcheck.LOG_CHECK): + if log.is_debug(LOG_CHECK): self.url_connection.set_debuglevel(1) self.url_connection.open(self.host, self.port) if self.user: diff --git a/linkcheck/checker/tests/__init__.py b/linkcheck/checker/tests/__init__.py index 6b69f74c..75ce1665 100644 --- a/linkcheck/checker/tests/__init__.py +++ b/linkcheck/checker/tests/__init__.py @@ -83,7 +83,7 @@ class TestLogger (linkcheck.logger.Logger): for warning in url_data.warnings: self.result.append(u"warning %s" % warning[1]) if self.has_part('result'): - self.result.append(url_data.valid and u"valid" or u"error") + self.result.append(u"valid" if url_data.valid else u"error") # note: do not append url_data.result since this is # platform dependent diff --git a/linkcheck/checker/urlbase.py b/linkcheck/checker/urlbase.py index ef9e0ffa..ad939a7c 100644 --- a/linkcheck/checker/urlbase.py +++ b/linkcheck/checker/urlbase.py @@ -37,7 +37,7 @@ import linkcheck.director import linkcheck.director.status import linkcheck.strformat import linkcheck.containers -import linkcheck.log +from .. import log, LOG_CHECK, LOG_CACHE import linkcheck.httplib2 import linkcheck.HtmlParser.htmlsax from const import WARN_URL_EFFECTIVE_URL, WARN_URL_UNICODE_DOMAIN, \ @@ -167,16 +167,14 @@ class UrlBase (object): Set result string and validity. """ if self.has_result: - linkcheck.log.warn(linkcheck.LOG_CHECK, + log.warn(LOG_CHECK, "Double result %r (previous %r) for %s", msg, self.result, self) else: self.has_result = True if not isinstance(msg, unicode): - linkcheck.log.warn(linkcheck.LOG_CHECK, - "Non-unicode result for %s: %r", self, msg) + log.warn(LOG_CHECK, "Non-unicode result for %s: %r", self, msg) elif not msg: - linkcheck.log.warn(linkcheck.LOG_CHECK, - "Empty result for %s", self) + log.warn(LOG_CHECK, "Empty result for %s", self) self.result = msg self.valid = valid @@ -260,8 +258,7 @@ class UrlBase (object): # URLs with different anchors to have the same content self.cache_content_key = urlparse.urlunsplit(self.urlparts[:4]+[u'']) assert isinstance(self.cache_content_key, unicode), self - assert None == linkcheck.log.debug(linkcheck.LOG_CACHE, - "Content cache key %r", self.cache_content_key) + log.debug(LOG_CACHE, "Content cache key %r", self.cache_content_key) # construct cache key if self.aggregate.config["anchorcaching"] and \ self.aggregate.config["anchors"]: @@ -273,8 +270,7 @@ class UrlBase (object): # no anchor caching self.cache_url_key = self.cache_content_key assert isinstance(self.cache_url_key, unicode), self - assert None == linkcheck.log.debug(linkcheck.LOG_CACHE, - "URL cache key %r", self.cache_url_key) + log.debug(LOG_CACHE, "URL cache key %r", self.cache_url_key) def check_syntax (self): """ @@ -286,8 +282,7 @@ class UrlBase (object): @return: True if syntax is correct, else False. @rtype: bool """ - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "checking syntax") + log.debug(LOG_CHECK, "checking syntax") if self.base_url is None: self.set_result(_("URL is missing"), valid=False) return @@ -370,16 +365,15 @@ class UrlBase (object): if self.aggregate.config["trace"]: linkcheck.trace.trace_on() try: - try: - self.local_check() - except (socket.error, select.error): - # on Unix, ctrl-c can raise - # error: (4, 'Interrupted system call') - etype, value = sys.exc_info()[:2] - if etype == errno.EINTR: - raise KeyboardInterrupt(value) - else: - raise + self.local_check() + except (socket.error, select.error): + # on Unix, ctrl-c can raise + # error: (4, 'Interrupted system call') + etype, value = sys.exc_info()[:2] + if etype == errno.EINTR: + raise KeyboardInterrupt(value) + else: + raise finally: # close/release possible open connection self.close_connection() @@ -394,8 +388,7 @@ class UrlBase (object): def local_check (self): """Local check function can be overridden in subclasses.""" - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "Checking %s", self) + log.debug(LOG_CHECK, "Checking %s", self) # start time for check check_start = time.time() self.set_extern(self.url) @@ -404,8 +397,7 @@ class UrlBase (object): return # check connection - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "checking connection") + log.debug(LOG_CHECK, "checking connection") try: self.check_connection() self.add_country_info() @@ -424,8 +416,7 @@ class UrlBase (object): # check content warningregex = self.aggregate.config["warningregex"] if warningregex and self.valid: - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "checking content") + log.debug(LOG_CHECK, "checking content") try: self.check_content(warningregex) except tuple(linkcheck.checker.const.ExcList): @@ -458,7 +449,7 @@ class UrlBase (object): return try: self.url_connection.close() - except: + except Exception: # ignore close errors pass self.url_connection = None @@ -468,8 +459,7 @@ class UrlBase (object): An exception occurred. Log it and set the cache flag. """ etype, value, tb = sys.exc_info() - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "exception %s", traceback.format_tb(tb)) + log.debug(LOG_CHECK, "exception %s", traceback.format_tb(tb)) # note: etype must be the exact class, not a subclass if (etype in linkcheck.checker.const.ExcNoCacheList) or \ (etype == socket.error and value[0]==errno.EBADF) or \ @@ -494,36 +484,28 @@ class UrlBase (object): """ Return True iff we can recurse into the url's content. """ - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "checking recursion of %r ...", self.url) + log.debug(LOG_CHECK, "checking recursion of %r ...", self.url) # Test self.valid before self.is_parseable(). if not self.valid: - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "... no, invalid.") + log.debug(LOG_CHECK, "... no, invalid.") return False if not self.is_parseable(): - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - ".. no, not parseable.") + log.debug(LOG_CHECK, "... no, not parseable.") return False if not self.can_get_content(): - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "... no, cannot get content.") + log.debug(LOG_CHECK, "... no, cannot get content.") return False rec_level = self.aggregate.config["recursionlevel"] if rec_level >= 0 and self.recursion_level >= rec_level: - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "... no, maximum recursion level reached.") + log.debug(LOG_CHECK, "... no, maximum recursion level reached.") return False if self.extern[0]: - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "... no, extern.") + log.debug(LOG_CHECK, "... no, extern.") return False if not self.content_allows_robots(): - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "... no, robots.") + log.debug(LOG_CHECK, "... no, robots.") return False - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "... yes, recursion.") + log.debug(LOG_CHECK, "... yes, recursion.") return True def content_allows_robots (self): @@ -556,8 +538,7 @@ class UrlBase (object): self.can_get_content()): # do not bother return - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "checking anchor %r", self.anchor) + log.debug(LOG_CHECK, "checking anchor %r", self.anchor) handler = linkcheck.linkparse.LinkFinder(self.get_content(), tags={'a': [u'name'], None: [u'id']}) parser = linkcheck.HtmlParser.htmlsax.parser(handler) @@ -585,20 +566,17 @@ class UrlBase (object): match = entry['pattern'].search(url) if (entry['negate'] and not match) or \ (match and not entry['negate']): - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "Extern URL %r", url) + log.debug(LOG_CHECK, "Extern URL %r", url) self.extern = (1, entry['strict']) return for entry in self.aggregate.config["internlinks"]: match = entry['pattern'].search(url) if (entry['negate'] and not match) or \ (match and not entry['negate']): - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "Intern URL %r", url) + log.debug(LOG_CHECK, "Intern URL %r", url) self.extern = (0, 0) return - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "Explicit extern URL %r", url) + log.debug(LOG_CHECK, "Explicit extern URL %r", url) self.extern = (1, 0) return @@ -649,8 +627,7 @@ class UrlBase (object): try: import tidy except ImportError: - linkcheck.log.warn(linkcheck.LOG_CHECK, - _("warning: tidy module is not available; " \ + log.warn(LOG_CHECK, _("warning: tidy module is not available; " \ "download from http://utidylib.berlios.de/")) return options = dict(output_html=0, show_warnings=1, quiet=True, @@ -660,11 +637,11 @@ class UrlBase (object): errors = filter_tidy_errors(doc.errors) for err in errors: self.add_warning("HTMLTidy: %s" % err) - except: + except Exception: # catch _all_ exceptions since we dont want third party module # errors to propagate into this library err = str(sys.exc_info()[1]) - linkcheck.log.warn(linkcheck.LOG_CHECK, + log.warn(LOG_CHECK, _("warning: tidy HTML parsing caused error: %s ") % err) def check_css (self): @@ -672,7 +649,7 @@ class UrlBase (object): try: import cssutils except ImportError: - linkcheck.log.warn(linkcheck.LOG_CHECK, + log.warn(LOG_CHECK, _("warning: cssutils module is not available; " \ "download from http://cthedot.de/cssutils/")) return @@ -687,12 +664,11 @@ class UrlBase (object): cssparser.parseString(self.get_content(), href=self.url) for record in handler.storage: self.add_warning("cssutils: %s" % record.getMessage()) - except: - raise + except Exception: # catch _all_ exceptions since we dont want third party module # errors to propagate into this library err = str(sys.exc_info()[1]) - linkcheck.log.warn(linkcheck.LOG_CHECK, + log.warn(LOG_CHECK, _("warning: cssutils parsing caused error: %s ") % err) def parse_url (self): @@ -717,8 +693,7 @@ class UrlBase (object): Parse into HTML content and search for URLs to check. Found URLs are added to the URL queue. """ - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "Parsing HTML %s", self) + log.debug(LOG_CHECK, "Parsing HTML %s", self) # construct parser object handler = linkcheck.linkparse.LinkFinder(self.get_content()) parser = linkcheck.HtmlParser.htmlsax.parser(handler) @@ -744,8 +719,7 @@ class UrlBase (object): """ Parse an opera bookmark file. """ - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "Parsing Opera bookmarks %s", self) + log.debug(LOG_CHECK, "Parsing Opera bookmarks %s", self) name = "" lineno = 0 for line in self.get_content().splitlines(): @@ -767,8 +741,7 @@ class UrlBase (object): Parse a text file with on url per line; comment and blank lines are ignored. """ - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "Parsing text %s", self) + log.debug(LOG_CHECK, "Parsing text %s", self) lineno = 0 for line in self.get_content().splitlines(): lineno += 1 @@ -784,8 +757,7 @@ class UrlBase (object): """ Parse a CSS file for url() patterns. """ - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "Parsing CSS %s", self) + log.debug(LOG_CHECK, "Parsing CSS %s", self) lineno = 0 linkfinder = linkcheck.linkparse.css_url_re.finditer strip_comments = linkcheck.linkparse.strip_c_comments diff --git a/linkcheck/configuration/__init__.py b/linkcheck/configuration/__init__.py index 0372775a..e55e0a8f 100644 --- a/linkcheck/configuration/__init__.py +++ b/linkcheck/configuration/__init__.py @@ -23,7 +23,7 @@ import os import logging.config import urllib import _linkchecker_configdata -import linkcheck.log +from .. import log, LOG_CHECK, LOG import linkcheck.containers import confparse @@ -162,7 +162,7 @@ class Configuration (dict): logging.config.fileConfig(filename) handler = linkcheck.ansicolor.ColoredStreamHandler(strm=sys.stderr) handler.setFormatter(logging.Formatter("%(levelname)s %(message)s")) - logging.getLogger(linkcheck.LOG).addHandler(handler) + logging.getLogger(LOG).addHandler(handler) self.set_debug(debug) def set_debug (self, debug): @@ -218,8 +218,7 @@ class Configuration (dict): cfiles.append(path) # weed out invalid files cfiles = [f for f in cfiles if os.path.isfile(f)] - assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, - "reading configuration from %s", cfiles) + log.debug(LOG_CHECK, "reading configuration from %s", cfiles) confparse.LCConfigParser(self).read(cfiles) self.sanitize() diff --git a/linkcheck/configuration/confparse.py b/linkcheck/configuration/confparse.py index b71758bb..8d8018c9 100644 --- a/linkcheck/configuration/confparse.py +++ b/linkcheck/configuration/confparse.py @@ -18,7 +18,8 @@ import ConfigParser import re -import linkcheck.log +import linkcheck +from .. import log, LOG_CHECK def read_multiline (value): @@ -52,7 +53,7 @@ class LCConfigParser (ConfigParser.RawConfigParser, object): self.read_authentication_config() self.read_filtering_config() except Exception, msg: - raise linkcheck.LinkCheckerError(linkcheck.LOG_CHECK, + raise linkcheck.LinkCheckerError( "Error parsing configuration: %s", str(msg)) def read_output_config (self): @@ -106,7 +107,7 @@ class LCConfigParser (ConfigParser.RawConfigParser, object): if self.has_option(section, "timeout"): num = self.getint(section, "timeout") if num < 0: - raise linkcheck.LinkCheckerError(linkcheck.LOG_CHECK, + raise linkcheck.LinkCheckerError( _("invalid negative value for timeout: %d\n"), num) self.config['timeout'] = num if self.has_option(section, "anchors"): @@ -136,7 +137,7 @@ class LCConfigParser (ConfigParser.RawConfigParser, object): try: import tidy except ImportError: - linkcheck.log.warn(linkcheck.LOG_CHECK, + log.warn(LOG_CHECK, _("warning: tidy module is not available; " \ "download from http://utidylib.berlios.de/")) val = False @@ -147,7 +148,7 @@ class LCConfigParser (ConfigParser.RawConfigParser, object): try: import cssutils except ImportError: - linkcheck.log.warn(linkcheck.LOG_CHECK, + log.warn(LOG_CHECK, _("warning: cssutils module is not available; " \ "download from http://cthedot.de/cssutils/")) val = False @@ -160,7 +161,7 @@ class LCConfigParser (ConfigParser.RawConfigParser, object): for val in read_multiline(self.get(section, "entry")): auth = val.split() if len(auth) != 3: - raise linkcheck.LinkCheckerError(linkcheck.LOG_CHECK, + raise linkcheck.LinkCheckerError(LOG_CHECK, _("missing auth part in entry %(val)r") % \ {"val": val}) self.config["authentication"].insert(0, @@ -175,11 +176,11 @@ class LCConfigParser (ConfigParser.RawConfigParser, object): break val = self.get(section, key) auth = val.split() - linkcheck.log.warn(linkcheck.LOG_CHECK, + log.warn(LOG_CHECK, _("the entry%(num)d syntax is deprecated; use " \ "the new multiline configuration syntax") % {"num": i}) if len(auth) != 3: - raise linkcheck.LinkCheckerError(linkcheck.LOG_CHECK, + raise linkcheck.LinkCheckerError(LOG_CHECK, _("missing auth part in entry %(val)r") % \ {"val": val}) self.config["authentication"].insert(0, @@ -204,7 +205,7 @@ class LCConfigParser (ConfigParser.RawConfigParser, object): if not self.has_option(section, key): break val = self.get(section, key) - linkcheck.log.warn(linkcheck.LOG_CHECK, + log.warn(LOG_CHECK, _("the nofollow%(num)d syntax is deprecated; use " \ "the new multiline configuration syntax") % {"num": i}) pat = linkcheck.get_link_pat(val, strict=0) @@ -219,7 +220,7 @@ class LCConfigParser (ConfigParser.RawConfigParser, object): key = "noproxyfor%d" % i if not self.has_option(section, key): break - linkcheck.log.warn(linkcheck.LOG_CHECK, + log.warn(LOG_CHECK, _("the noproxyfor%(num)d syntax is deprecated; use " \ "the new multiline configuration syntax") % {"num": i}) val = self.get(section, key) @@ -240,7 +241,7 @@ class LCConfigParser (ConfigParser.RawConfigParser, object): break # backwards compatibility: split and ignore second part val = self.get(section, key).split()[0] - linkcheck.log.warn(linkcheck.LOG_CHECK, + log.warn(LOG_CHECK, _("the ignore%(num)d syntax is deprecated; use " \ "the new multiline configuration syntax") % {"num": i}) pat = linkcheck.get_link_pat(val, strict=1) diff --git a/linkcheck/containers.py b/linkcheck/containers.py index 4a49948f..4d4978a4 100644 --- a/linkcheck/containers.py +++ b/linkcheck/containers.py @@ -56,8 +56,7 @@ class SetList (list): class ListDict (dict): - """ - A dictionary whose iterators reflect the order in which elements + """A dictionary whose iterators reflect the order in which elements were added. """ @@ -128,8 +127,7 @@ class ListDict (dict): super(ListDict, self).clear() def get_true (self, key, default): - """ - Return default element if key is not in the dict, or if self[key] + """Return default element if key is not in the dict, or if self[key] evaluates to False. Useful for example if value is None, but default value should be an empty string. """ diff --git a/linkcheck/cookies.py b/linkcheck/cookies.py index 416dd052..27a87179 100644 --- a/linkcheck/cookies.py +++ b/linkcheck/cookies.py @@ -27,19 +27,18 @@ And a cookie storage class is provided. [2] http://www.faqs.org/rfcs/rfc2109.html """ +from __future__ import with_statement import time import re import Cookie import cookielib import cStringIO as StringIO import rfc822 -import strformat +from . import strformat class CookieError (StandardError): - """ - Thrown for invalid cookie syntax or conflicting/impossible values. - """ + """Thrown for invalid cookie syntax or conflicting/impossible values.""" pass @@ -66,11 +65,9 @@ CookiePattern = re.compile(r""" class HttpCookie (object): - """ - A cookie consists of one name-value pair with attributes. + """A cookie consists of one name-value pair with attributes. Each attribute consists of a predefined name (see attribute_names) - and a value (which is optional for some attributes). - """ + and a value (which is optional for some attributes).""" # A mapping from the lowercase variant on the left to the # appropriate traditional formatting on the right. @@ -132,10 +129,8 @@ class HttpCookie (object): self.name, self.value, attrs) def is_valid_for (self, scheme, host, port, path): - """ - Check validity of this cookie against the desired scheme, - host and path. - """ + """Check validity of this cookie against the desired scheme, + host and path.""" if self.check_expired() and \ self.check_domain(host) and \ self.check_port(port) and \ @@ -274,8 +269,8 @@ class HttpCookie (object): def server_header_value (self): parts = ["%s=%s" % (self.name, quote(self.value))] - parts += ["%s=%s"% (self.attribute_names[k], self.quote(k, v)) \ - for k, v in self.attributes.iteritems()] + parts.extend(["%s=%s"% (self.attribute_names[k], self.quote(k, v)) \ + for k, v in self.attributes.items()]) return "; ".join(parts) def client_header_value (self): @@ -283,15 +278,13 @@ class HttpCookie (object): if "version" in self.attributes: parts.append("$Version=%s" % quote(self.attributes["version"])) parts.append("%s=%s" % (self.name, quote(self.value))) - parts += ["$%s=%s"% (self.attribute_names[k], self.quote(k, v)) \ - for k, v in self.attributes.iteritems() if k != "version"] + parts.extend(["$%s=%s"% (self.attribute_names[k], self.quote(k, v)) \ + for k, v in self.attributes.items() if k != "version"]) return "; ".join(parts) class NetscapeCookie (HttpCookie): - """ - Parses RFC 2109 (Netscape) cookies. - """ + """Parses RFC 2109 (Netscape) cookies.""" def __init__ (self, text, scheme, host, path): self.parse(text) @@ -327,14 +320,12 @@ class Rfc2965Cookie (HttpCookie): def from_file (filename): - """ - Parse cookie data from a text file in HTTP header format. + """Parse cookie data from a text file in HTTP header format. @return: list of tuples (headers, scheme, host, path) """ entries = [] - fd = open(filename) - try: + with open(filename) as fd: lines = [] for line in fd.readlines(): line = line.rstrip() @@ -347,13 +338,10 @@ def from_file (filename): if lines: entries.append(from_headers("\r\n".join(lines))) return entries - finally: - fd.close() def from_headers (strheader): - """ - Parse cookie data from a string in HTTP header (RFC 822) format. + """Parse cookie data from a string in HTTP header (RFC 822) format. @return: tuple (headers, scheme, host, path) @raises: ValueError for incomplete or invalid data diff --git a/linkcheck/decorators.py b/linkcheck/decorators.py index 7994aaf5..dd1c42c3 100644 --- a/linkcheck/decorators.py +++ b/linkcheck/decorators.py @@ -35,6 +35,7 @@ def h (): pass """ +from __future__ import with_statement import warnings import signal import os @@ -43,8 +44,7 @@ import time def update_func_meta (fake_func, real_func): - """ - Set meta information (eg. __doc__) of fake function to that + """Set meta information (eg. __doc__) of fake function to that of the real function. @return fake_func """ @@ -56,14 +56,10 @@ def update_func_meta (fake_func, real_func): def deprecated (func): - """ - A decorator which can be used to mark functions as deprecated. - It emits a warning when the function is called. - """ + """A decorator which can be used to mark functions as deprecated. + It emits a warning when the function is called.""" def newfunc (*args, **kwargs): - """ - Print deprecated warning and execute original function. - """ + """Print deprecated warning and execute original function.""" warnings.warn("Call to deprecated function %s." % func.__name__, category=DeprecationWarning) return func(*args, **kwargs) @@ -71,8 +67,7 @@ def deprecated (func): def signal_handler (signal_number): - """ - From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/410666 + """From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/410666 A decorator to set the specified function as handler for a signal. This function is the 'outer' decorator, called with only the @@ -82,9 +77,7 @@ def signal_handler (signal_number): """ # create the 'real' decorator which takes only a function as an argument def newfunc (function): - """ - Register function as signal handler. - """ + """Register function as signal handler.""" # note: actually the kill(2) function uses the signal number of 0 # for a special case, but for signal(2) only positive integers # are allowed @@ -96,49 +89,32 @@ def signal_handler (signal_number): def synchronize (lock, func): - """ - Return synchronized function acquiring the given lock. - """ + """Return synchronized function acquiring the given lock.""" def newfunc (*args, **kwargs): - """ - Execute function synchronized. - """ - lock.acquire() - try: + """Execute function synchronized.""" + with lock: return func(*args, **kwargs) - finally: - lock.release() return update_func_meta(newfunc, func) def synchronized (lock): - """ - A decorator calling a function with aqcuired lock. - """ + """A decorator calling a function with aqcuired lock.""" return lambda func: synchronize(lock, func) def notimplemented (func): - """ - Raises a NotImplementedError if the function is called. + """Raises a NotImplementedError if the function is called.""" def newfunc (*args, **kwargs): - """ - def newfunc (*args, **kwargs): - """ - Raise NotImplementedError - """ + """Raise NotImplementedError""" raise NotImplementedError("%s not implemented" % func.__name__) return update_func_meta(newfunc, func) def timeit (func, log, limit): - """ - Print execution time of the function. For quick'n'dirty profiling. - """ + """Print execution time of the function. For quick'n'dirty profiling.""" + def newfunc (*args, **kwargs): - """ - Execute function and print execution time. - """ + """Execute function and print execution time.""" t = time.time() res = func(*args, **kwargs) duration = time.time() - t @@ -155,11 +131,10 @@ def timed (log=sys.stderr, limit=2.0): class memoized (object): - """ - Decorator that caches a function's return value each time it is called. + """Decorator that caches a function's return value each time it is called. If called later with the same arguments, the cached value is returned, and - not re-evaluated. - """ + not re-evaluated.""" + def __init__(self, func): self.func = func self.cache = {} @@ -181,11 +156,9 @@ class memoized (object): class curried (object): - """ - Decorator that returns a function that keeps returning functions + """Decorator that returns a function that keeps returning functions until all arguments are supplied; then the original function is - evaluated. - """ + evaluated.""" def __init__(self, func, *a): self.func = func self.args = a diff --git a/linkcheck/director/__init__.py b/linkcheck/director/__init__.py index 5fee6b07..54039e16 100644 --- a/linkcheck/director/__init__.py +++ b/linkcheck/director/__init__.py @@ -18,8 +18,9 @@ Management of checking a queue of links with several threads. """ import time +import os import thread -import linkcheck.log +from .. import log, LOG_CHECK import linkcheck.cache.urlqueue import linkcheck.cache.robots_txt import linkcheck.cache.cookie @@ -43,11 +44,11 @@ def check_urls (aggregate): except KeyboardInterrupt: interrupt(aggregate) except thread.error: - linkcheck.log.warn(linkcheck.LOG_CHECK, + log.warn(LOG_CHECK, _("Could not start a new thread. Check that the current user" \ " is allowed to start new threads.")) abort(aggregate) - except: + except Exception: console.internal_error() abort(aggregate) @@ -72,9 +73,9 @@ def interrupt (aggregate): interrupts.""" while True: try: - linkcheck.log.warn(linkcheck.LOG_CHECK, + log.warn(LOG_CHECK, _("keyboard interrupt; waiting for active threads to finish")) - linkcheck.log.warn(linkcheck.LOG_CHECK, + log.warn(LOG_CHECK, _("another keyboard interrupt will exit immediately")) print_active_threads(aggregate) abort(aggregate) @@ -86,11 +87,11 @@ def interrupt (aggregate): def print_active_threads (aggregate): if not aggregate.threads: return - linkcheck.log.info(linkcheck.LOG_CHECK, _("These URLs are still active:")) + log.info(LOG_CHECK, _("These URLs are still active:")) for t in aggregate.threads: name = t.getName() if name.startswith("Check-"): - linkcheck.log.info(linkcheck.LOG_CHECK, name[6:]) + log.info(LOG_CHECK, name[6:]) def abort (aggregate): @@ -102,20 +103,9 @@ def abort (aggregate): aggregate.logger.end_log_output() break except KeyboardInterrupt: - linkcheck.log.warn(linkcheck.LOG_CHECK, _("keyboard interrupt; force shutdown")) - force_shutdown() - - -def force_shutdown (): - """Force shutdown, not finishing anything.""" - import os - if os.name == "posix": - # POSIX systems seem to do fine with sys.exit() - import sys - sys.exit(1) - else: - # forced exit without cleanup - os._exit(1) + log.warn(LOG_CHECK, _("keyboard interrupt; force shutdown")) + # forced exit without cleanup + os._exit(1) def get_aggregate (config): diff --git a/linkcheck/director/aggregator.py b/linkcheck/director/aggregator.py index 1d0c01a2..16242463 100644 --- a/linkcheck/director/aggregator.py +++ b/linkcheck/director/aggregator.py @@ -17,7 +17,7 @@ """ Aggregate needed object instances for checker threads. """ -import linkcheck.log +from .. import log, LOG_CHECK import linkcheck.director import logger import status @@ -61,7 +61,7 @@ class Aggregate (object): try: self.urlqueue.join(timeout=self.config["timeout"]) except linkcheck.cache.urlqueue.Timeout: - linkcheck.log.warn(linkcheck.LOG_CHECK, "Abort timed out") + log.warn(LOG_CHECK, "Abort timed out") def remove_stopped_threads (self): "Remove the stopped threads from the internal thread list.""" @@ -74,5 +74,5 @@ class Aggregate (object): t.stop() t.join(2) if t.isAlive(): - linkcheck.log.warn(linkcheck.LOG_CHECK, "Thread %s still active", t) + log.warn(LOG_CHECK, "Thread %s still active", t) self.connections.clear() diff --git a/linkcheck/director/task.py b/linkcheck/director/task.py index b6d35913..849ad762 100644 --- a/linkcheck/director/task.py +++ b/linkcheck/director/task.py @@ -16,7 +16,7 @@ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. import thread import linkcheck.decorators -import linkcheck.log +from .. import log, LOG_CHECK import linkcheck.threader import console @@ -29,10 +29,9 @@ class CheckedTask (linkcheck.threader.StoppableThread): try: self.run_checked() except KeyboardInterrupt: - linkcheck.log.warn(linkcheck.LOG_CHECK, - "interrupt did not reach the main thread") + log.warn(LOG_CHECK, "interrupt did not reach the main thread") thread.interrupt_main() - except: + except Exception: console.internal_error() @linkcheck.decorators.notimplemented diff --git a/linkcheck/dns/ifconfig.py b/linkcheck/dns/ifconfig.py index 0aa82518..30abaf51 100644 --- a/linkcheck/dns/ifconfig.py +++ b/linkcheck/dns/ifconfig.py @@ -7,7 +7,7 @@ import errno import array import fcntl import struct -import linkcheck.log +from .. import log, LOG_DNS class IfConfig (object): @@ -49,7 +49,7 @@ class IfConfig (object): try: result = self._ioctl(func, ifreq) except IOError, msg: - linkcheck.log.warn(linkcheck.LOG_DNS, + log.warn(LOG_DNS, "error getting addr for interface %r: %s", ifname, msg) return None return socket.inet_ntoa(result[20:24]) @@ -97,7 +97,7 @@ class IfConfig (object): try: result = self._ioctl(self.SIOCGIFFLAGS, ifreq) except IOError, msg: - linkcheck.log.warn(linkcheck.LOG_DNS, + log.warn(LOG_DNS, "error getting flags for interface %r: %s", ifname, msg) return 0 # extract the interface's flags from the return value diff --git a/linkcheck/dns/resolver.py b/linkcheck/dns/resolver.py index 096fe811..8e9939a4 100644 --- a/linkcheck/dns/resolver.py +++ b/linkcheck/dns/resolver.py @@ -25,7 +25,7 @@ import sys import time import encodings.idna -import linkcheck.log +from .. import log, LOG_DNS import linkcheck.dns.exception import linkcheck.dns.message import linkcheck.dns.name @@ -727,8 +727,7 @@ def query(qname, rdtype=linkcheck.dns.rdatatype.A, rdclass=linkcheck.dns.rdatacl object to make the query. @see: L{linkcheck.dns.resolver.Resolver.query} for more information on the parameters.""" - assert None == linkcheck.log.debug(linkcheck.LOG_DNS, - "Query %s %s %s", qname, rdtype, rdclass) + log.debug(LOG_DNS, "Query %s %s %s", qname, rdtype, rdclass) if resolver is None: resolver = get_default_resolver() return resolver.query(qname, rdtype, rdclass, tcp) diff --git a/linkcheck/dns/tests/test_zone.py b/linkcheck/dns/tests/test_zone.py index 1a4d980d..63f5717b 100644 --- a/linkcheck/dns/tests/test_zone.py +++ b/linkcheck/dns/tests/test_zone.py @@ -14,6 +14,7 @@ # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT # OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +from __future__ import with_statement import cStringIO as StringIO import os import unittest @@ -108,12 +109,9 @@ ns2 1w1D1h1m1S a 10.0.0.2 def get_file_lines (fn): res = [] - fd = open(fn) - try: + with open(fn) as fd: for line in fd: res.append(line) - finally: - fd.close() return res diff --git a/linkcheck/dns/zone.py b/linkcheck/dns/zone.py index 814d43c9..5216885f 100644 --- a/linkcheck/dns/zone.py +++ b/linkcheck/dns/zone.py @@ -127,22 +127,22 @@ class Zone(object): del self.nodes[key] def __iter__(self): - return self.nodes.iterkeys() + return self.nodes.keys() def iterkeys(self): - return self.nodes.iterkeys() + return self.nodes.keys() def keys(self): return self.nodes.keys() def itervalues(self): - return self.nodes.itervalues() + return self.nodes.values() def values(self): return self.nodes.values() def iteritems(self): - return self.nodes.iteritems() + return self.nodes.items() def items(self): return self.nodes.items() @@ -419,7 +419,7 @@ class Zone(object): rdtype = linkcheck.dns.rdatatype.from_text(rdtype) if isinstance(covers, str): covers = linkcheck.dns.rdatatype.from_text(covers) - for (name, node) in self.iteritems(): + for (name, node) in self.items(): for rds in node: if rdtype == linkcheck.dns.rdatatype.ANY or \ (rds.rdtype == rdtype and rds.covers == covers): @@ -442,7 +442,7 @@ class Zone(object): rdtype = linkcheck.dns.rdatatype.from_text(rdtype) if isinstance(covers, str): covers = linkcheck.dns.rdatatype.from_text(covers) - for (name, node) in self.iteritems(): + for (name, node) in self.items(): for rds in node: if rdtype == linkcheck.dns.rdatatype.ANY or \ (rds.rdtype == rdtype and rds.covers == covers): @@ -486,7 +486,7 @@ class Zone(object): names = self.keys() names.sort() else: - names = self.iterkeys() + names = self.keys() for n in names: l = self[n].to_text(n, origin=self.origin, relativize=relativize) diff --git a/linkcheck/dummy.py b/linkcheck/dummy.py index ed50d846..8dfd22ae 100644 --- a/linkcheck/dummy.py +++ b/linkcheck/dummy.py @@ -19,9 +19,7 @@ Dummy objects. """ class Dummy (object): - """ - A dummy object ignores all access to it. Useful for testing. - """ + """A dummy object ignores all access to it. Useful for testing.""" def __init__ (self, *args, **kwargs): pass @@ -64,7 +62,5 @@ class Dummy (object): def dummy (*args, **kwargs): - """ - Ignore any positional or keyword arguments, return None. - """ + """Ignore any positional or keyword arguments, return None.""" pass diff --git a/linkcheck/fcgi.py b/linkcheck/fcgi.py index d9f9e96d..919b6fd7 100644 --- a/linkcheck/fcgi.py +++ b/linkcheck/fcgi.py @@ -88,32 +88,24 @@ error = 'fcgi.error' # anywhere at the moment def _error (msg): - """ - Append a string to /tmp/err. - """ + """Append a string to /tmp/err.""" errf = file('/tmp/err', 'a+') errf.write(msg+'\n') errf.close() class Record (object): - """ - Class representing FastCGI records. - """ + """Class representing FastCGI records.""" def __init__ (self): - """ - Initialize record data. - """ + """Initialize record data.""" self.version = FCGI_VERSION_1 self.rec_type = FCGI_UNKNOWN_TYPE self.req_id = FCGI_NULL_REQUEST_ID self.content = "" def read_record (self, sock): - """ - Read a FastCGI record from socket. - """ + """Read a FastCGI record from socket.""" s = [ord(x) for x in sock.recv(8)] self.version, self.rec_type, padding_length = s[0], s[1], s[6] self.req_id, content_length = (s[2]<<8)+s[3], (s[4]<<8)+s[5] @@ -145,9 +137,7 @@ class Record (object): self.protocolStatus = ord(c[4]) def write_record (self, sock): - """ - Write a FastCGI request to socket. - """ + """Write a FastCGI request to socket.""" content = self.content if self.rec_type == FCGI_BEGIN_REQUEST: content = chr(self.role>>8) + chr(self.role & 255) + \ @@ -178,7 +168,7 @@ class Record (object): c_len & 255, pad_len, 0] - hdr = ''.join([chr(x) for x in hdr]) + hdr = ''.join(chr(x) for x in hdr) sock.send(hdr + content + pad_len*'\000') @@ -231,79 +221,59 @@ def HandleManTypes (r, conn): class FastCGIWriter (object): - """ - File-like object writing FastCGI requests. All read operations - return empty data. - """ + """File-like object writing FastCGI requests. All read operations + return empty data.""" def __init__ (self, rec, conn): - """ - Initialize with given record and connection. - """ + """Initialize with given record and connection.""" self.record = rec self.conn = conn self.closed = False def close (self): - """ - Close this writer. - """ + """Close this writer.""" if not self.closed: self.closed = True self.record.content = "" self.record.write_record(self.conn) def isatty (self): - """ - Returns False. - """ + """Returns False.""" if self.closed: raise ValueError, "I/O operation on closed file" return False def seek (self, pos, mode=0): - """ - Does nothing. - """ + """Does nothing.""" if self.closed: raise ValueError, "I/O operation on closed file" def tell (self): - """ - Return zero. - """ + """Return zero.""" if self.closed: raise ValueError, "I/O operation on closed file" return 0 def read (self, n=-1): - """ - Return empty string. - """ + """Return empty string.""" if self.closed: raise ValueError, "I/O operation on closed file" return "" def readline (self, length=None): - """ - Return empty string. - """ + """Return empty string.""" if self.closed: raise ValueError, "I/O operation on closed file" return "" def readlines (self): - """ - Return empty list. - """ + """Return empty list.""" if self.closed: raise ValueError, "I/O operation on closed file" return [] def write (self, s): - """ - Write data in record for record to connection. - """ + """Write data in record for record to connection.""" if self.closed: raise ValueError, "I/O operation on closed file" while s: @@ -312,23 +282,17 @@ class FastCGIWriter (object): self.record.write_record(self.conn) def get_next_chunk (self, data): - """ - Return tuple (chunk of data, newdata). - """ + """Return tuple (chunk of data, newdata).""" chunk = data[:8192] data = data[8192:] return chunk, data def writelines (self, lines): - """ - Write given lines to the connection. - """ + """Write given lines to the connection.""" self.write(''.join(lines)) def flush (self): - """ - Does nothing. - """ + """Raises ValueError if called with closed file.""" if self.closed: raise ValueError, "I/O operation on closed file" diff --git a/linkcheck/fileutil.py b/linkcheck/fileutil.py index 475e2d37..9b24fb45 100644 --- a/linkcheck/fileutil.py +++ b/linkcheck/fileutil.py @@ -25,8 +25,7 @@ import fnmatch def write_file (filename, content, backup=False, callback=None): - """ - Overwrite a possibly existing file with new content. Do this + """Overwrite a possibly existing file with new content. Do this in a manner that does not leave truncated or broken files behind. @param filename: name of file to write @type filename: string @@ -55,8 +54,7 @@ def write_file (filename, content, backup=False, callback=None): def has_module (name): - """ - Test if given module can be imported. + """Test if given module can be imported. @return: flag if import is successful @rtype: bool """ @@ -68,23 +66,17 @@ def has_module (name): class GlobDirectoryWalker (object): - """ - A forward iterator that traverses a directory tree. - """ + """A forward iterator that traverses a directory tree.""" def __init__ (self, directory, pattern="*"): - """ - Set start directory and pattern matcher. - """ + """Set start directory and pattern matcher.""" self.stack = [directory] self.pattern = pattern self.files = [] self.index = 0 def __getitem__ (self, index): - """ - Search for next filename. - """ + """Search for next filename.""" while True: try: filename = self.files[self.index] @@ -108,35 +100,25 @@ rglob = GlobDirectoryWalker class Buffer (object): - """ - Holds buffered data - """ + """Holds buffered data""" def __init__ (self, empty=''): - """ - Initialize buffer. - """ + """Initialize buffer.""" self.empty = self.buf = empty self.tmpbuf = [] self.pos = 0 def __len__ (self): - """ - Buffer length. - """ + """Buffer length.""" return self.pos def write (self, data): - """ - Write data to buffer. - """ + """Write data to buffer.""" self.tmpbuf.append(data) self.pos += len(data) def flush (self, overlap=0): - """ - Flush buffered data and return it. - """ + """Flush buffered data and return it.""" self.buf += self.empty.join(self.tmpbuf) self.tmpbuf = [] if overlap and overlap < self.pos: @@ -149,9 +131,7 @@ class Buffer (object): def get_mtime (filename): - """ - Return modification time of filename or zero on errors. - """ + """Return modification time of filename or zero on errors.""" try: return os.stat(filename)[stat.ST_MTIME] except os.error: @@ -177,10 +157,8 @@ def pathencode (path): # cache for modified check {absolute filename -> mtime} _mtime_cache = {} def has_changed (filename): - """ - Check if filename has changed since the last check. If this - is the first check, assume the file is changed. - """ + """Check if filename has changed since the last check. If this + is the first check, assume the file is changed.""" key = os.path.abspath(filename) mtime = get_mtime(key) if key not in _mtime_cache: diff --git a/linkcheck/httplib2.py b/linkcheck/httplib2.py index 35c8a310..4ce155a1 100644 --- a/linkcheck/httplib2.py +++ b/linkcheck/httplib2.py @@ -863,7 +863,7 @@ class HTTPConnection: print "Cannot stat!!" if thelen is not None: self.putheader('Content-Length',thelen) - for hdr, value in headers.iteritems(): + for hdr, value in headers.items(): self.putheader(hdr, value) self.endheaders() diff --git a/linkcheck/i18n.py b/linkcheck/i18n.py index a9bd4cfe..6b2411d8 100644 --- a/linkcheck/i18n.py +++ b/linkcheck/i18n.py @@ -33,9 +33,7 @@ if default_encoding is None: default_encoding = "ascii" def install_builtin (translator, do_unicode): - """ - Install _() and _n() gettext methods into default namespace. - """ + """Install _() and _n() gettext methods into default namespace.""" import __builtin__ if do_unicode: __builtin__.__dict__['_'] = translator.ugettext @@ -47,36 +45,26 @@ def install_builtin (translator, do_unicode): __builtin__.__dict__['_n'] = translator.ngettext class Translator (gettext.GNUTranslations): - """ - A translation class always installing its gettext methods into the - default namespace. - """ + """A translation class always installing its gettext methods into the + default namespace.""" def install (self, do_unicode): - """ - Install gettext methods into the default namespace. - """ + """Install gettext methods into the default namespace.""" install_builtin(self, do_unicode) class NullTranslator (gettext.NullTranslations): - """ - A dummy translation class always installing its gettext methods into - the default namespace. - """ + """A dummy translation class always installing its gettext methods into + the default namespace.""" def install (self, do_unicode): - """ - Install gettext methods into the default namespace. - """ + """Install gettext methods into the default namespace.""" install_builtin(self, do_unicode) def init (domain, directory): - """ - Initialize this gettext i18n module. Searches for supported languages - and installs the gettext translator class. - """ + """Initialize this gettext i18n module. Searches for supported languages + and installs the gettext translator class.""" global default_language, default_encoding if os.path.isdir(directory): # get supported languages @@ -98,9 +86,7 @@ def init (domain, directory): def get_translator (domain, directory, languages=None, translatorklass=Translator, fallback=False, fallbackklass=NullTranslator): - """ - Search the appropriate GNUTranslations class. - """ + """Search the appropriate GNUTranslations class.""" translator = gettext.translation(domain, localedir=directory, languages=languages, class_=translatorklass, fallback=fallback) if not isinstance(translator, gettext.GNUTranslations) and fallbackklass: @@ -109,18 +95,14 @@ def get_translator (domain, directory, languages=None, def get_lang (lang): - """ - Return lang if it is supported, or the default language. - """ + """Return lang if it is supported, or the default language.""" if lang in supported_languages: return lang return default_language def get_headers_lang (headers): - """ - Return preferred supported language in given HTTP headers. - """ + """Return preferred supported language in given HTTP headers.""" if 'Accept-Language' not in headers: return default_language languages = headers['Accept-Language'].split(",") @@ -145,17 +127,10 @@ def get_headers_lang (headers): def get_locale (): - """ - Return current configured locale. - """ + """Return current configured locale.""" loc = None encoding = 'ascii' - try: - loc, encoding = locale.getlocale(category=locale.LC_ALL) - except ValueError: - # XXX ignore Python bug - # http://bugs.python.org/issue1158909 - pass + loc, encoding = locale.getlocale(category=locale.LC_ALL) if loc is None: return ('C', 'ascii') loc = locale.normalize(loc) @@ -182,14 +157,10 @@ lang_transis = { } def lang_name (lang): - """ - Return full name of given language. - """ + """Return full name of given language.""" return lang_names[lang] def lang_trans (lang, curlang): - """ - Return translated full name of given language. - """ + """Return translated full name of given language.""" return lang_transis[lang][curlang] diff --git a/linkcheck/lc_cgi.py b/linkcheck/lc_cgi.py index 3d19bd7d..e0205471 100644 --- a/linkcheck/lc_cgi.py +++ b/linkcheck/lc_cgi.py @@ -44,25 +44,19 @@ lang_locale = { _is_level = re.compile(r'^(0|1|2|3|-1)$').match class FormError (StandardError): - """ - Form related errors. - """ + """Form related errors.""" pass def startoutput (out=sys.stdout): - """ - Print leading HTML headers to given output stream. - """ + """Print leading HTML headers to given output stream.""" out.write("Content-type: text/html\r\n" "Cache-Control: no-cache\r\n" "Pragma: no-cache\r\n" "\r\n") def checkaccess (out=sys.stdout, hosts=None, servers=None, env=os.environ): - """ - See if remote addr is allowed to access the CGI interface. - """ + """See if remote addr is allowed to access the CGI interface.""" if hosts is None: hosts = [] if servers is None: @@ -76,9 +70,7 @@ def checkaccess (out=sys.stdout, hosts=None, servers=None, env=os.environ): def checklink (out=sys.stdout, form=None, env=os.environ): - """ - Main cgi function, check the given links and print out the result. - """ + """Main cgi function, check the given links and print out the result.""" if form is None: form = {} try: @@ -115,18 +107,14 @@ def checklink (out=sys.stdout, form=None, env=os.environ): def get_host_name (form): - """ - Return host name of given URL. - """ + """Return host name of given URL.""" return urlparse.urlparse(form["url"].value)[1] def checkform (form): - """ - Check form data. throw exception on error + """Check form data. throw exception on error Be sure to NOT print out any user-given data as HTML code, so use - only plain strings as exception text. - """ + only plain strings as exception text.""" # check lang support if "language" in form: lang = form['language'].value @@ -156,9 +144,7 @@ def checkform (form): raise FormError(_("invalid %s option syntax") % option) def logit (form, env): - """ - Log form errors. - """ + """Log form errors.""" global _logfile if not _logfile: return @@ -175,9 +161,7 @@ def logit (form, env): def print_error (out, why): - """ - Print standard error page. - """ + """Print standard error page.""" out.write(_("""