diff --git a/linkcheck/AnsiColor.py b/linkcheck/AnsiColor.py
deleted file mode 100644
index 743e1066..00000000
--- a/linkcheck/AnsiColor.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# -*- coding: iso-8859-1 -*-
-"""ANSI Color definitions and functions"""
-# Copyright (C) 2000-2004 Bastian Kleineidam
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-import os
-import sys
-
-# Escape for ANSI colors
-AnsiEsc = "\x1b[%sm"
-
-# type numbers
-AnsiType = {
- 'bold': '1',
- 'light': '2',
- 'blink': '5',
- 'invert': '7',
-}
-
-# color numbers (the capitalized colors are bright)
-AnsiColor = {
- 'default': '0',
- 'black': '30',
- 'red': '31',
- 'green': '32',
- 'yellow': '33',
- 'blue': '34',
- 'purple': '35',
- 'cyan': '36',
- 'white': '37',
- 'Black': '40',
- 'Red': '41',
- 'Green': '42',
- 'Yellow': '43',
- 'Blue': '44',
- 'Purple': '45',
- 'Cyan': '46',
- 'White': '47',
-}
-
-
-def esc_ansicolor (color):
- """convert a named color definition to an escaped ANSI color"""
- ctype = ''
- if ";" in color:
- ctype, color = color.split(";", 1)
- if not AnsiType.has_key(ctype):
- print >>sys.stderr, "invalid ANSI color type", repr(ctype)
- print >>sys.stderr, "valid values are", AnsiType.keys()
- ctype = ''
- else:
- ctype = AnsiType[ctype]+";"
- if not AnsiColor.has_key(color):
- print >>sys.stderr, "invalid ANSI color name", repr(color)
- print >>sys.stderr, "valid values are", AnsiColor.keys()
- cnum = '0'
- else:
- cnum = AnsiColor[color]
- return AnsiEsc % (ctype+cnum)
-
-AnsiReset = esc_ansicolor("default")
-
-
-def colorize (text, color=None):
- "return text colorized if TERM is set"
- if (color is not None) and os.environ.get('TERM'):
- color = esc_ansicolor(color)
- return '%s%s%s' % (color, text, AnsiReset)
- else:
- return text
diff --git a/linkcheck/Config.py b/linkcheck/Config.py
index 25f1f15a..22173ecf 100644
--- a/linkcheck/Config.py
+++ b/linkcheck/Config.py
@@ -24,10 +24,11 @@ import Cookie
import sets
import urllib
import _linkchecker_configdata
+import bk
+import bk.containers
import linkcheck
import linkcheck.i18n
-import linkcheck.log
-
+import linkcheck.Threader
try:
import threading
except ImportError:
@@ -69,7 +70,7 @@ def _check_morsel (m, host, path):
return None
# check expiry date (if its stored)
if m["expires"]:
- debug(BRING_IT_ON, "Cookie expires", m["expires"])
+ bk.log.debug(linkcheck.LOG_CHECK, "Cookie expires", m["expires"])
# XXX
return m.output(header='').strip()
@@ -90,7 +91,6 @@ class Configuration (dict):
# reduceThreads(). Ok, this is a hack but ItWorksForMe(tm).
self.reduceCount = 0
-
def reset (self):
"""Reset to default values"""
self['linknumber'] = 0
@@ -174,55 +174,47 @@ class Configuration (dict):
self.setThreads(10)
self.urlSeen = sets.Set()
self.urlSeenLock = threading.Lock()
- self.urlCache = linkcheck.containers.LRU(MAX_URL_CACHE)
+ self.urlCache = bk.containers.LRU(MAX_URL_CACHE)
self.urlCacheLock = threading.Lock()
- self.robotsTxtCache = linkcheck.containers.LRU(MAX_ROBOTS_TXT_CACHE)
+ self.robotsTxtCache = bk.containers.LRU(MAX_ROBOTS_TXT_CACHE)
self.robotsTxtCacheLock = threading.Lock()
self.urls = []
self.urlCounter = 0
self.urlsLock = threading.Lock()
# basic data lock (eg for cookies, link numbers etc.)
self.dataLock = threading.Lock()
- self.cookies = linkcheck.containers.LRU(MAX_COOKIES_CACHE)
-
+ self.cookies = bk.containers.LRU(MAX_COOKIES_CACHE)
def setThreads (self, num):
- debug(HURT_ME_PLENTY, "set threading with %d threads"%num)
+ bk.log.debug(linkcheck.LOG_CHECK, "set threading with %d threads"%num)
self.threader.threads_max = num
if num>0:
sys.setcheckinterval(50)
else:
sys.setcheckinterval(100)
-
def newLogger (self, logtype, dict={}):
args = {}
args.update(self[logtype])
args.update(dict)
- from linkcheck.log import Loggers
- return Loggers[logtype](**args)
-
+ return linkcheck.Loggers[logtype](**args)
def addLogger(self, logtype, loggerClass, logargs={}):
"add a new logger type"
- from linkcheck.log import Loggers
- Loggers[logtype] = loggerClass
+ linkcheck.Loggers[logtype] = loggerClass
self[logtype] = logargs
-
def log_init (self):
if not self["quiet"]: self["log"].init()
for log in self["fileoutput"]:
log.init()
-
def log_endOfOutput (self):
if not self["quiet"]:
self["log"].endOfOutput(linknumber=self['linknumber'])
for log in self["fileoutput"]:
log.endOfOutput(linknumber=self['linknumber'])
-
def incrementLinknumber (self):
try:
self.dataLock.acquire()
@@ -230,19 +222,15 @@ class Configuration (dict):
finally:
self.dataLock.release()
-
def hasMoreUrls (self):
return self.urls
-
def finished (self):
return self.threader.finished() and not self.urls
-
def finish (self):
self.threader.finish()
-
def appendUrl (self, urlData):
self.urlsLock.acquire()
try:
@@ -260,7 +248,6 @@ class Configuration (dict):
finally:
self.urlsLock.release()
-
def filterUrlQueue (self):
"""remove already cached urls from queue"""
# note: url lock must be acquired
@@ -270,7 +257,6 @@ class Configuration (dict):
print >>sys.stderr, \
i18n._("removed %d cached urls from incoming queue")%removed
-
def getUrl (self):
"""get first url in queue and return it"""
self.urlsLock.acquire()
@@ -281,11 +267,9 @@ class Configuration (dict):
finally:
self.urlsLock.release()
-
def checkUrl (self, url):
self.threader.start_thread(url.check, ())
-
def urlSeen_has_key (self, key):
self.urlSeenLock.acquire()
try:
@@ -293,7 +277,6 @@ class Configuration (dict):
finally:
self.urlSeenLock.release()
-
def urlSeen_set (self, key):
self.urlSeenLock.acquire()
try:
@@ -301,7 +284,6 @@ class Configuration (dict):
finally:
self.urlSeenLock.release()
-
def urlCache_has_key (self, key):
self.urlCacheLock.acquire()
try:
@@ -309,7 +291,6 @@ class Configuration (dict):
finally:
self.urlCacheLock.release()
-
def urlCache_get (self, key):
self.urlCacheLock.acquire()
try:
@@ -317,16 +298,14 @@ class Configuration (dict):
finally:
self.urlCacheLock.release()
-
def urlCache_set (self, key, val):
self.urlCacheLock.acquire()
try:
- debug(NIGHTMARE, "caching", repr(key))
+ bk.log.debug(linkcheck.LOG_CHECK, "caching", repr(key))
self.urlCache[key] = val
finally:
self.urlCacheLock.release()
-
def robotsTxtCache_has_key (self, key):
self.robotsTxtCacheLock.acquire()
try:
@@ -334,7 +313,6 @@ class Configuration (dict):
finally:
self.robotsTxtCacheLock.release()
-
def robotsTxtCache_get (self, key):
self.robotsTxtCacheLock.acquire()
try:
@@ -342,7 +320,6 @@ class Configuration (dict):
finally:
self.robotsTxtCacheLock.release()
-
def robotsTxtCache_set (self, key, val):
self.robotsTxtCacheLock.acquire()
try:
@@ -350,7 +327,6 @@ class Configuration (dict):
finally:
self.robotsTxtCacheLock.release()
-
def log_newUrl (self, url):
self.logLock.acquire()
try:
@@ -361,25 +337,23 @@ class Configuration (dict):
finally:
self.logLock.release()
-
def storeCookies (self, headers, host):
self.dataLock.acquire()
try:
output = []
for h in headers.getallmatchingheaders("Set-Cookie"):
output.append(h)
- debug(BRING_IT_ON, "Store Cookie", h)
+ bk.log.debug(linkcheck.LOG_CHECK, "Store Cookie", h)
c = self.cookies.setdefault(host, Cookie.SimpleCookie())
c.load(h)
return output
finally:
self.dataLock.release()
-
def getCookies (self, host, path):
self.dataLock.acquire()
try:
- debug(BRING_IT_ON, "Get Cookie", host, path)
+ bk.log.debug(linkcheck.LOG_CHECK, "Get Cookie", host, path)
if not self.cookies.has_key(host):
return []
cookievals = []
@@ -391,7 +365,6 @@ class Configuration (dict):
finally:
self.dataLock.release()
-
def read (self, files = []):
cfiles = files[:]
if not cfiles:
@@ -402,86 +375,107 @@ class Configuration (dict):
cfiles.append(norm("~/.linkcheckerrc"))
self.readConfig(cfiles)
-
def readConfig (self, files):
"""this big function reads all the configuration parameters
used in the linkchecker module."""
- debug(BRING_IT_ON, "reading configuration from", files)
- from linkcheck.log import Loggers
+ bk.log.debug(linkcheck.LOG_CHECK, "reading configuration from", files)
try:
cfgparser = ConfigParser.ConfigParser()
cfgparser.read(files)
except ConfigParser.Error, msg:
- debug(BRING_IT_ON, msg)
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
return
section="output"
- for key in Loggers.keys():
+ for key in linkcheck.Loggers.keys():
if cfgparser.has_section(key):
for opt in cfgparser.options(key):
try:
self[key][opt] = cfgparser.get(key, opt)
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
self[key]['fields'] = [f.strip() for f in cfgparser.get(key, 'fields').split(',')]
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
log = cfgparser.get(section, "log")
- if Loggers.has_key(log):
+ if linkcheck.Loggers.has_key(log):
self['log'] = self.newLogger(log)
else:
warn(i18n._("invalid log option '%s'") % log)
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
if cfgparser.getboolean(section, "verbose"):
self["verbose"] = True
self["warnings"] = True
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
- try: self["quiet"] = cfgparser.getboolean(section, "quiet")
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
- try: self["status"] = cfgparser.getboolean(section, "status")
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
- try: self["warnings"] = cfgparser.getboolean(section, "warnings")
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
+ try:
+ self["quiet"] = cfgparser.getboolean(section, "quiet")
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
+ try:
+ self["status"] = cfgparser.getboolean(section, "status")
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
+ try:
+ self["warnings"] = cfgparser.getboolean(section, "warnings")
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
filelist = cfgparser.get(section, "fileoutput").split(",")
for arg in filelist:
arg = arg.strip()
# no file output for the blacklist and none Logger
- if Loggers.has_key(arg) and arg not in ["blacklist", "none"]:
+ if linkcheck.Loggers.has_key(arg) and arg not in ["blacklist", "none"]:
self['fileoutput'].append(
self.newLogger(arg, {'fileoutput':1}))
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
section="checking"
try:
num = cfgparser.getint(section, "threads")
self.setThreads(num)
- except ConfigParser.Error: debug(NIGHTMARE, msg)
- try: self["anchors"] = cfgparser.getboolean(section, "anchors")
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
+ except ConfigParser.Error:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
+ try:
+ self["anchors"] = cfgparser.getboolean(section, "anchors")
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
num = cfgparser.getint(section, "recursionlevel")
self["recursionlevel"] = num
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
- try: self["strict"] = cfgparser.getboolean(section, "strict")
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
+ try:
+ self["strict"] = cfgparser.getboolean(section, "strict")
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
wr = cfgparser.get(section, "warningregex")
if wr:
self["warningregex"] = re.compile(wr)
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
- try: self["warnsizebytes"] = int(cfgparser.get(section, "warnsizebytes"))
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
+ try:
+ self["warnsizebytes"] = int(cfgparser.get(section, "warnsizebytes"))
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
self["nntpserver"] = cfgparser.get(section, "nntpserver")
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
self["interactive"] = cfgparser.getboolean(section, "interactive")
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
self["anchorcaching"] = cfgparser.getboolean(section, "anchorcaching")
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
section = "authentication"
try:
@@ -494,7 +488,8 @@ class Configuration (dict):
'user': auth[1],
'password': auth[2]})
i += 1
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
section = "filtering"
try:
@@ -506,8 +501,13 @@ class Configuration (dict):
break
self["externlinks"].append(linkcheck.getLinkPat(ctuple[0], strict=int(ctuple[1])))
i += 1
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
- try: self["internlinks"].append(linkcheck.getLinkPat(cfgparser.get(section, "internlinks")))
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
- try: self["denyallow"] = cfgparser.getboolean(section, "denyallow")
- except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
+ try:
+ self["internlinks"].append(linkcheck.getLinkPat(cfgparser.get(section, "internlinks")))
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
+ try:
+ self["denyallow"] = cfgparser.getboolean(section, "denyallow")
+ except ConfigParser.Error, msg:
+ bk.log.debug(linkcheck.LOG_CHECK, msg)
diff --git a/linkcheck/DNS/__init__.py b/linkcheck/DNS/__init__.py
index 288ee869..883c3b0e 100644
--- a/linkcheck/DNS/__init__.py
+++ b/linkcheck/DNS/__init__.py
@@ -11,57 +11,11 @@
__version__ = '2.3.0'
-import Type, Opcode, Status, Class
-from Base import DnsRequest, DNSError
-from Lib import DnsResult
-Error=DNSError
-from lazy import *
-Request = DnsRequest
-Result = DnsResult
+import Base
+import Lib
-import linkcheck.DNS.Base
-linkcheck.DNS.Base.DiscoverNameServers()
+Error = Base.DNSError
+Request = Base.DnsRequest
+Result = Lib.DnsResult
-#
-# $Log$
-# Revision 1.8 2004/07/07 18:01:59 calvin
-# new module layout
-#
-# Revision 1.7 2003/07/04 14:23:22 calvin
-# add coding line
-#
-# Revision 1.6 2003/01/05 17:52:53 calvin
-# fix
-#
-# Revision 1.5 2003/01/05 17:39:19 calvin
-# pychecker fixes
-#
-# Revision 1.4 2002/11/26 23:27:43 calvin
-# update to Python >= 2.2.1
-#
-# Revision 1.8 2002/05/06 06:17:49 anthonybaxter
-# found that the old README file called itself release 2.2. So make
-# this one 2.3...
-#
-# Revision 1.7 2002/05/06 06:16:15 anthonybaxter
-# make some sort of reasonable version string. releasewards ho!
-#
-# Revision 1.6 2002/03/19 13:05:02 anthonybaxter
-# converted to class based exceptions (there goes the python1.4 compatibility :)
-#
-# removed a quite gross use of 'eval()'.
-#
-# Revision 1.5 2002/03/19 12:41:33 anthonybaxter
-# tabnannied and reindented everything. 4 space indent, no tabs.
-# yay.
-#
-# Revision 1.4 2001/11/26 17:57:51 stroeder
-# Added __version__
-#
-# Revision 1.3 2001/08/09 09:08:55 anthonybaxter
-# added identifying header to top of each file
-#
-# Revision 1.2 2001/07/19 06:57:07 anthony
-# cvs keywords added
-#
-#
+Base.DiscoverNameServers()
diff --git a/linkcheck/StringUtil.py b/linkcheck/StringUtil.py
index 84a2015e..f096efb4 100644
--- a/linkcheck/StringUtil.py
+++ b/linkcheck/StringUtil.py
@@ -86,20 +86,6 @@ def getLastWordBoundary (s, width):
return width-1
-def applyTable (table, s):
- "apply a table of replacement pairs to str"
- for mapping in table:
- s = s.replace(mapping[0], mapping[1])
- return s
-
-
-def sqlify (s):
- "Escape special SQL chars and strings"
- if not s:
- return "NULL"
- return "'%s'"%applyTable(SQLTable, s)
-
-
def htmlify (s):
"Escape special HTML chars and strings"
return applyTable(HtmlTable, s)
diff --git a/linkcheck/XmlUtils.py b/linkcheck/XmlUtils.py
deleted file mode 100644
index f4700562..00000000
--- a/linkcheck/XmlUtils.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# -*- coding: iso-8859-1 -*-
-"""XML utility functions"""
-# Copyright (C) 2003-2004 Bastian Kleineidam
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-__version__ = "$Revision$"[11:-2]
-__date__ = "$Date$"[7:-2]
-
-import xml.sax.saxutils
-
-xmlattr_entities = {
- "&": "&",
- "<": "<",
- ">": ">",
- "\"": """,
-}
-
-
-def xmlquote (s):
- """quote characters for XML"""
- return xml.sax.saxutils.escape(s)
-
-
-def xmlquoteattr (s):
- """quote XML attribute, ready for inclusion with double quotes"""
- return xml.sax.saxutils.escape(s, xmlattr_entities)
-
-
-def xmlunquote (s):
- """unquote characters from XML"""
- return xml.sax.saxutils.unescape(s)
-
-
-def xmlunquoteattr (s):
- """unquote attributes from XML"""
- return xml.sax.saxutils.unescape(s, xmlattr_entities)
diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py
index 0cebd021..ac7903b9 100644
--- a/linkcheck/__init__.py
+++ b/linkcheck/__init__.py
@@ -16,9 +16,10 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-import re
import sys
-import urlparse
+import re
+import time
+import linkcheck.i18n
# logger areas
@@ -32,9 +33,36 @@ class LinkCheckerError (Exception):
pass
+def strtime (t):
+ """return ISO 8601 formatted time"""
+ return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t)) + \
+ strtimezone()
+
+
+def strduration (duration):
+ """return string formatted time duration"""
+ name = linkcheck.i18n._("seconds")
+ if duration > 60:
+ duration = duration / 60
+ name = linkcheck.i18n._("minutes")
+ if duration > 60:
+ duration = duration / 60
+ name = linkcheck.i18n._("hours")
+ return " %.3f %s"%(duration, name)
+
+
+def strtimezone ():
+ """return timezone info, %z on some platforms, but not supported on all"""
+ if time.daylight:
+ zone = time.altzone
+ else:
+ zone = time.timezone
+ return "%+04d" % int(-zone/3600)
+
+
def getLinkPat (arg, strict=False):
"""get a link pattern matcher for intern/extern links"""
- linkcheck.log.debug(LOG_CHECK, "Link pattern %r", arg)
+ bk.log.debug(LOG_CHECK, "Link pattern %r", arg)
if arg[0:1] == '!':
pattern = arg[1:]
negate = True
@@ -48,48 +76,37 @@ def getLinkPat (arg, strict=False):
}
-# file extensions we can parse recursively
-extensions = {
- "html": re.compile(r'(?i)\.s?html?$'),
- "opera": re.compile(r'^(?i)opera.adr$'), # opera bookmark file
- "css": re.compile(r'(?i)\.css$'), # CSS stylesheet
-# "text": re.compile(r'(?i)\.(txt|xml|tsv|csv|sgml?|py|java|cc?|cpp|h)$'),
-}
-
-
-import linkcheck.FileUrlData
-import linkcheck.IgnoredUrlData
-import linkcheck.FtpUrlData
-import linkcheck.GopherUrlData
-import linkcheck.HttpUrlData
-import linkcheck.HttpsUrlData
-import linkcheck.MailtoUrlData
-import linkcheck.TelnetUrlData
-import linkcheck.NntpUrlData
-
-def set_intern_url (url, klass, config):
- """Precondition: config['strict'] is true (ie strict checking) and
- recursion level is zero (ie url given on the command line)"""
- if klass == linkcheck.FileUrlData.FileUrlData:
- linkcheck.log.debug(LOG_CHECK, "Add intern pattern ^file:")
- config['internlinks'].append(getLinkPat("^file:"))
- elif klass in [linkcheck.HttpUrlData.HttpUrlData,
- linkcheck.HttpsUrlData.HttpsUrlData,
- linkcheck.FtpUrlData.FtpUrlData]:
- domain = urlparse.urlsplit(url)[1]
- if domain:
- domain = "://%s"%re.escape(domain)
- debug(BRING_IT_ON, "Add intern domain", domain)
- # add scheme colon to link pattern
- config['internlinks'].append(getLinkPat(domain))
-
-
-import linkcheck.logger
-
def printStatus (config, curtime, start_time):
tocheck = len(config.urls)
links = config['linknumber']
active = config.threader.active_threads()
- duration = linkcheck.logger.strduration(curtime - start_time)
+ duration = strduration(curtime - start_time)
print >>sys.stderr, linkcheck.i18n._("%5d urls queued, %4d links checked, %2d active threads, runtime %s")%\
(tocheck, links, active, duration)
+
+
+import linkcheck.logger.StandardLogger
+import linkcheck.logger.HtmlLogger
+import linkcheck.logger.ColoredLogger
+import linkcheck.logger.GMLLogger
+import linkcheck.logger.SQLLogger
+import linkcheck.logger.CSVLogger
+import linkcheck.logger.BlacklistLogger
+import linkcheck.logger.XMLLogger
+import linkcheck.logger.NoneLogger
+
+
+# default logger classes
+Loggers = {
+ "text": linkcheck.logger.StandardLogger.StandardLogger,
+ "html": linkcheck.logger.HtmlLogger.HtmlLogger,
+ "colored": linkcheck.logger.ColoredLogger.ColoredLogger,
+ "gml": linkcheck.logger.GMLLogger.GMLLogger,
+ "sql": linkcheck.logger.SQLLogger.SQLLogger,
+ "csv": linkcheck.logger.CSVLogger.CSVLogger,
+ "blacklist": linkcheck.logger.BlacklistLogger.BlacklistLogger,
+ "xml": linkcheck.logger.XMLLogger.XMLLogger,
+ "none": linkcheck.logger.NoneLogger.NoneLogger,
+}
+# for easy printing: a comma separated logger list
+LoggerKeys = ", ".join(Loggers.keys())
diff --git a/linkcheck/checker/FileUrlData.py b/linkcheck/checker/FileUrlData.py
index 28cbd77f..7299e369 100644
--- a/linkcheck/checker/FileUrlData.py
+++ b/linkcheck/checker/FileUrlData.py
@@ -19,10 +19,10 @@
import re
import os
import urlparse
-import linkcheck.UrlData
+import linkcheck.checker
# OSError is thrown on Windows when a file is not found
-linkcheck.UrlData.ExcList.append(OSError)
+linkcheck.checker.ExcList.append(OSError)
# if file extension was fruitless, look at the content
contents = {
diff --git a/linkcheck/checker/UrlData.py b/linkcheck/checker/UrlData.py
index eabe5cca..f902900b 100644
--- a/linkcheck/checker/UrlData.py
+++ b/linkcheck/checker/UrlData.py
@@ -78,18 +78,6 @@ def get_absolute_url (urlName, baseRef, parentName):
return ""
-# we catch these exceptions, all other exceptions are internal
-# or system errors
-ExcList = [
- IOError,
- ValueError, # from httplib.py
- linkcheck.LinkCheckerError,
- linkcheck.DNS.Error,
- socket.timeout,
- socket.error,
- select.error,
-]
-
if hasattr(socket, "sslerror"):
ExcList.append(socket.sslerror)
@@ -226,15 +214,13 @@ class UrlData (object):
def check (self):
try:
self._check()
- except KeyboardInterrupt:
- raise
except (socket.error, select.error):
# on Unix, ctrl-c can raise
# error: (4, 'Interrupted system call')
etype, value = sys.exc_info()[:2]
if etype!=4:
raise
- except linkcheck.test_support.Error:
+ except (KeyboardInterrupt, linkcheck.test_support.Error):
raise
except:
internal_error()
diff --git a/linkcheck/checker/__init__.py b/linkcheck/checker/__init__.py
index 5de372a4..68ced2e3 100644
--- a/linkcheck/checker/__init__.py
+++ b/linkcheck/checker/__init__.py
@@ -17,6 +17,25 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import time
+import socket
+import select
+import re
+import urlparse
+import linkcheck
+import linkcheck.DNS
+
+
+# we catch these exceptions, all other exceptions are internal
+# or system errors
+ExcList = [
+ IOError,
+ ValueError, # from httplib.py
+ linkcheck.LinkCheckerError,
+ linkcheck.DNS.Error,
+ socket.timeout,
+ socket.error,
+ select.error,
+]
# main check function
@@ -66,6 +85,32 @@ import linkcheck.checker.MailtoUrlData
import linkcheck.checker.TelnetUrlData
import linkcheck.checker.NntpUrlData
+# file extensions we can parse recursively
+extensions = {
+ "html": re.compile(r'(?i)\.s?html?$'),
+ "opera": re.compile(r'^(?i)opera.adr$'), # opera bookmark file
+ "css": re.compile(r'(?i)\.css$'), # CSS stylesheet
+# "text": re.compile(r'(?i)\.(txt|xml|tsv|csv|sgml?|py|java|cc?|cpp|h)$'),
+}
+
+
+def set_intern_url (url, klass, config):
+ """Precondition: config['strict'] is true (ie strict checking) and
+ recursion level is zero (ie url given on the command line)"""
+ if klass == linkcheck.checker.FileUrlData.FileUrlData:
+ linkcheck.log.debug(linkcheck.LOG_CHECK, "Add intern pattern ^file:")
+ config['internlinks'].append(getLinkPat("^file:"))
+ elif klass in [linkcheck.checker.HttpUrlData.HttpUrlData,
+ linkcheck.checker.HttpsUrlData.HttpsUrlData,
+ linkcheck.checker.FtpUrlData.FtpUrlData]:
+ domain = urlparse.urlsplit(url)[1]
+ if domain:
+ domain = "://%s"%re.escape(domain)
+ linkcheck.log.debug(linkcheck.LOG_CHECK, "Add intern domain", domain)
+ # add scheme colon to link pattern
+ config['internlinks'].append(getLinkPat(domain))
+
+
def getUrlDataFrom (urlName, recursionLevel, config, parentName=None,
baseRef=None, line=0, column=0, name=None,
cmdline=None):
diff --git a/linkcheck/containers.py b/linkcheck/containers.py
deleted file mode 100644
index 1562409b..00000000
--- a/linkcheck/containers.py
+++ /dev/null
@@ -1,200 +0,0 @@
-# -*- coding: iso-8859-1 -*-
-"""special container classes"""
-# Copyright (C) 2004 Bastian Kleineidam
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-__version__ = "$Revision$"[11:-2]
-__date__ = "$Date$"[7:-2]
-
-
-class SetList (list):
- """a list that eliminates all duplicates
- """
-
- def append (self, x):
- """append only if not already there"""
- if x not in self:
- super(SetList, self).append(x)
-
- def extend (self, x):
- """extend while eliminating duplicates by appending item for item"""
- for i in x:
- self.append(i)
-
- def insert (self, i, x):
- """insert only if not already there"""
- if x not in self:
- super(SetList, self).insert(i, x)
-
- def __setitem__ (self, key, value):
- """set new value, and eliminate old duplicates (if any)"""
- oldvalues = []
- for i in range(len(self)):
- if self[i]==value:
- oldvalues.append(i)
- super(SetList, self).__setitem__(key, value)
- # remove old duplicates (from last to first)
- oldvalues.reverse()
- for i in oldvalues:
- if i!=key:
- del self[key]
-
-
-class ListDict (dict):
- """a dictionary whose iterators reflect the order in which elements
- were added
- """
-
- def __init__ (self):
- """initialize sorted key list"""
- # sorted list of keys
- self._keys = []
-
- def __setitem__ (self, key, value):
- """add key,value to dict, append key to sorted list"""
- if not self.has_key(key):
- self._keys.append(key)
- super(ListDict, self).__setitem__(key, value)
-
- def __delitem__ (self, key):
- """remove key from dict"""
- self._keys.remove(key)
- super(ListDict, self).__delitem__(key)
-
- def values (self):
- """return sorted list of values"""
- return [self[k] for k in self._keys]
-
- def items (self):
- """return sorted list of items"""
- return [(k, self[k]) for k in self._keys]
-
- def keys (self):
- """return sorted list of keys"""
- return self._keys[:]
-
- def itervalues (self):
- """return iterator over sorted values"""
- return iter(self.values())
-
- def iteritems (self):
- """return iterator over sorted items"""
- return iter(self.items())
-
- def iterkeys (self):
- """return iterator over sorted keys"""
- return iter(self.keys())
-
- def clear (self):
- """remove all dict entires"""
- self._keys = []
- super(ListDict, self).clear()
-
-
-class LRU (object):
- """
- Implementation of a length-limited O(1) LRU queue.
- Built for and used by PyPE:
- http://pype.sourceforge.net
- Copyright 2003 Josiah Carlson. (Licensed under the GPL)
- """
- class Node (object):
- def __init__ (self, prev, me):
- self.prev = prev
- self.me = me
- self.next = None
-
- def __init__ (self, count, pairs=[]):
- self.count = max(count, 1)
- self.d = {}
- self.first = None
- self.last = None
- for key, value in pairs:
- self[key] = value
-
- def __contains__ (self, obj):
- return obj in self.d
-
- def has_key (self, obj):
- return self.d.has_key(obj)
-
- def __getitem__ (self, obj):
- a = self.d[obj].me
- self[a[0]] = a[1]
- return a[1]
-
- def __setitem__ (self, obj, val):
- if obj in self.d:
- del self[obj]
- nobj = self.Node(self.last, (obj, val))
- if self.first is None:
- self.first = nobj
- if self.last:
- self.last.next = nobj
- self.last = nobj
- self.d[obj] = nobj
- if len(self.d) > self.count:
- if self.first == self.last:
- self.first = None
- self.last = None
- return
- a = self.first
- a.next.prev = None
- self.first = a.next
- a.next = None
- del self.d[a.me[0]]
- del a
-
- def __delitem__ (self, obj):
- nobj = self.d[obj]
- if nobj.prev:
- nobj.prev.next = nobj.next
- else:
- self.first = nobj.next
- if nobj.next:
- nobj.next.prev = nobj.prev
- else:
- self.last = nobj.prev
- del self.d[obj]
-
- def __iter__ (self):
- cur = self.first
- while cur != None:
- cur2 = cur.next
- yield cur.me[1]
- cur = cur2
-
- def iteritems (self):
- cur = self.first
- while cur != None:
- cur2 = cur.next
- yield cur.me
- cur = cur2
-
- def iterkeys (self):
- return iter(self.d)
-
- def itervalues (self):
- for i,j in self.iteritems():
- yield j
-
- def keys (self):
- return self.d.keys()
-
- def setdefault (self, key, failobj=None):
- if not self.has_key(key):
- self[key] = failobj
- return self[key]
diff --git a/linkcheck/i18n.py b/linkcheck/i18n.py
deleted file mode 100644
index f5e324d8..00000000
--- a/linkcheck/i18n.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# -*- coding: iso-8859-1 -*-
-"""internationalization support"""
-# Copyright (C) 2000-2004 Bastian Kleineidam
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-# i18n suppport
-import os
-from _linkchecker_configdata import install_data
-
-def init_gettext ():
- global _
- try:
- import gettext
- domain = 'linkcheck'
- localedir = os.path.join(install_data, 'share', 'locale')
- _ = gettext.translation(domain, localedir).gettext
- except (IOError, ImportError):
- # default gettext function
- _ = lambda s: s
-
-init_gettext()
diff --git a/linkcheck/logger/BlacklistLogger.py b/linkcheck/logger/BlacklistLogger.py
index b2c05310..21f3d668 100644
--- a/linkcheck/logger/BlacklistLogger.py
+++ b/linkcheck/logger/BlacklistLogger.py
@@ -17,10 +17,10 @@
import sys
import os
-import linkcheck
+import linkcheck.logger.Logger
-class BlacklistLogger (linkcheck.logger.Logger):
+class BlacklistLogger (linkcheck.logger.Logger.Logger):
"""Updates a blacklist of wrong links. If a link on the blacklist
is working (again), it is removed from the list. So after n days
we have only links on the list which failed for n days.
diff --git a/linkcheck/logger/CSVLogger.py b/linkcheck/logger/CSVLogger.py
index 1679ac07..4608ea7b 100644
--- a/linkcheck/logger/CSVLogger.py
+++ b/linkcheck/logger/CSVLogger.py
@@ -17,7 +17,9 @@
import time
import csv
-import linkcheck
+import linkcheck.i18n
+import linkcheck.logger.StandardLogger
+import linkcheck.logger.Logger
class CSVLogger (linkcheck.logger.StandardLogger.StandardLogger):
@@ -30,7 +32,7 @@ class CSVLogger (linkcheck.logger.StandardLogger.StandardLogger):
self.lineterminator = "\n"
def init (self):
- linkcheck.logger.Logger.init(self)
+ linkcheck.logger.Logger.Logger.init(self)
if self.fd is None:
return
self.starttime = time.time()
diff --git a/linkcheck/logger/ColoredLogger.py b/linkcheck/logger/ColoredLogger.py
index 288ae5d7..51e7808f 100644
--- a/linkcheck/logger/ColoredLogger.py
+++ b/linkcheck/logger/ColoredLogger.py
@@ -15,10 +15,12 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-import linkcheck
+import linkcheck.i18n
+import linkcheck.AnsiColor
+import linkcheck.logger.StandardLogger
-class ColoredLogger (linkcheck.logger.StandardLogger):
+class ColoredLogger (linkcheck.logger.StandardLogger.StandardLogger):
"""ANSI colorized output"""
def __init__ (self, **args):
diff --git a/linkcheck/logger/NoneLogger.py b/linkcheck/logger/NoneLogger.py
index 74d4b47c..f066aa4b 100644
--- a/linkcheck/logger/NoneLogger.py
+++ b/linkcheck/logger/NoneLogger.py
@@ -15,7 +15,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-import linkcheck
+import linkcheck.logger.Logger
class NoneLogger (linkcheck.logger.Logger.Logger):
diff --git a/linkcheck/logger/SQLLogger.py b/linkcheck/logger/SQLLogger.py
index 3e5d0fb3..b8aced22 100644
--- a/linkcheck/logger/SQLLogger.py
+++ b/linkcheck/logger/SQLLogger.py
@@ -17,6 +17,23 @@
import time
import linkcheck
+import linkcheck.i18n
+import linkcheck.logger.StandardLogger
+import linkcheck.logger.Logger
+
+
+def applyTable (table, s):
+ "apply a table of replacement pairs to str"
+ for mapping in table:
+ s = s.replace(mapping[0], mapping[1])
+ return s
+
+
+def sqlify (s):
+ "Escape special SQL chars and strings"
+ if not s:
+ return "NULL"
+ return "'%s'"%applyTable(SQLTable, s)
class SQLLogger (linkcheck.logger.StandardLogger.StandardLogger):
@@ -33,7 +50,7 @@ class SQLLogger (linkcheck.logger.StandardLogger.StandardLogger):
self.starttime = time.time()
if self.has_field("intro"):
self.fd.write("-- "+(linkcheck.i18n._("created by %s at %s\n") % (linkcheck.Config.AppName,
- linkcheck.logger.strtime(self.starttime))))
+ linkcheck.strtime(self.starttime))))
self.fd.write("-- "+(linkcheck.i18n._("Get the newest version at %s\n") % linkcheck.Config.Url))
self.fd.write("-- "+(linkcheck.i18n._("Write comments and bugs to %s\n\n") % \
linkcheck.Config.Email))
@@ -47,19 +64,19 @@ class SQLLogger (linkcheck.logger.StandardLogger.StandardLogger):
" values "
"(%s,%d,%s,%s,%s,%s,%s,%s,%d,%s,%d,%d,%s,%d,%d,%d,%d)%s\n" % \
(self.dbname,
- linkcheck.StringUtil.sqlify(urlData.urlName),
+ sqlify(urlData.urlName),
urlData.recursionLevel,
- linkcheck.StringUtil.sqlify(linkcheck.url.url_quote(urlData.parentName or "")),
- linkcheck.StringUtil.sqlify(urlData.baseRef),
- linkcheck.StringUtil.sqlify(urlData.errorString),
- linkcheck.StringUtil.sqlify(urlData.validString),
- linkcheck.StringUtil.sqlify(urlData.warningString),
- linkcheck.StringUtil.sqlify(urlData.infoString),
+ sqlify(linkcheck.url.url_quote(urlData.parentName or "")),
+ sqlify(urlData.baseRef),
+ sqlify(urlData.errorString),
+ sqlify(urlData.validString),
+ sqlify(urlData.warningString),
+ sqlify(urlData.infoString),
urlData.valid,
- linkcheck.StringUtil.sqlify(linkcheck.url.url_quote(urlData.url)),
+ sqlify(linkcheck.url.url_quote(urlData.url)),
urlData.line,
urlData.column,
- linkcheck.StringUtil.sqlify(urlData.name),
+ sqlify(urlData.name),
urlData.checktime,
urlData.dltime,
urlData.dlsize,
diff --git a/linkcheck/logger/XMLLogger.py b/linkcheck/logger/XMLLogger.py
index 3949a814..289d20a4 100644
--- a/linkcheck/logger/XMLLogger.py
+++ b/linkcheck/logger/XMLLogger.py
@@ -16,9 +16,38 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import time
+import xml.sax.saxutils
import linkcheck
+xmlattr_entities = {
+ "&": "&",
+ "<": "<",
+ ">": ">",
+ "\"": """,
+}
+
+
+def xmlquote (s):
+ """quote characters for XML"""
+ return xml.sax.saxutils.escape(s)
+
+
+def xmlquoteattr (s):
+ """quote XML attribute, ready for inclusion with double quotes"""
+ return xml.sax.saxutils.escape(s, xmlattr_entities)
+
+
+def xmlunquote (s):
+ """unquote characters from XML"""
+ return xml.sax.saxutils.unescape(s)
+
+
+def xmlunquoteattr (s):
+ """unquote attributes from XML"""
+ return xml.sax.saxutils.unescape(s, xmlattr_entities)
+
+
class XMLLogger (linkcheck.logger.StandardLogger.StandardLogger):
"""XML output mirroring the GML structure. Easy to parse with any XML
tool."""
@@ -56,7 +85,7 @@ class XMLLogger (linkcheck.logger.StandardLogger.StandardLogger):
self.fd.write(">\n")
if self.has_field("realurl"):
self.fd.write(" \n" %\
- linkcheck.XmlUtils.xmlquote(linkcheck.url.url_quote(node.url)))
+ xmlquote(linkcheck.url.url_quote(node.url)))
self.fd.write(" \n")
if node.dltime>=0 and self.has_field("dltime"):
self.fd.write(" %f\n" % node.dltime)
@@ -85,7 +114,7 @@ class XMLLogger (linkcheck.logger.StandardLogger.StandardLogger):
self.fd.write(">\n")
if self.has_field("url"):
self.fd.write(" \n" % \
- linkcheck.XmlUtils.linkcheck.xmlquote(node.urlName))
+ xmlquote(node.urlName))
self.fd.write(" \n")
if self.has_field("result"):
self.fd.write(" %d\n" % \
diff --git a/linkcheck/logger/__init__.py b/linkcheck/logger/__init__.py
index 0fa3e900..b38ca970 100644
--- a/linkcheck/logger/__init__.py
+++ b/linkcheck/logger/__init__.py
@@ -16,60 +16,3 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-import time
-import linkcheck
-import linkcheck.i18n
-
-
-def strtime (t):
- """return ISO 8601 formatted time"""
- return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t)) + \
- strtimezone()
-
-
-def strduration (duration):
- """return string formatted time duration"""
- name = linkcheck.i18n._("seconds")
- if duration > 60:
- duration = duration / 60
- name = linkcheck.i18n._("minutes")
- if duration > 60:
- duration = duration / 60
- name = linkcheck.i18n._("hours")
- return " %.3f %s"%(duration, name)
-
-
-def strtimezone ():
- """return timezone info, %z on some platforms, but not supported on all"""
- if time.daylight:
- zone = time.altzone
- else:
- zone = time.timezone
- return "%+04d" % int(-zone/3600)
-
-
-import linkcheck.logger.StandardLogger
-import linkcheck.logger.HtmlLogger
-import linkcheck.logger.ColoredLogger
-import linkcheck.logger.GMLLogger
-import linkcheck.logger.SQLLogger
-import linkcheck.logger.CSVLogger
-import linkcheck.logger.BlacklistLogger
-import linkcheck.logger.XMLLogger
-import linkcheck.logger.NoneLogger
-
-
-# default logger classes
-Loggers = {
- "text": linkcheck.logger.StandardLogger.StandardLogger,
- "html": linkcheck.logger.HtmlLogger.HtmlLogger,
- "colored": linkcheck.logger.ColoredLogger.ColoredLogger,
- "gml": linkcheck.logger.GMLLogger.GMLLogger,
- "sql": linkcheck.logger.SQLLogger.SQLLogger,
- "csv": linkcheck.logger.CSVLogger.CSVLogger,
- "blacklist": linkcheck.logger.BlacklistLogger.BlacklistLogger,
- "xml": linkcheck.logger.XMLLogger.XMLLogger,
- "none": linkcheck.logger.NoneLogger.NoneLogger,
-}
-# for easy printing: a comma separated logger list
-LoggerKeys = ", ".join(Loggers.keys())