more import fixes

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1364 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2004-07-19 08:58:59 +00:00
parent 2d3e8a2b9b
commit 6476c8675d
18 changed files with 258 additions and 643 deletions

View file

@ -1,83 +0,0 @@
# -*- coding: iso-8859-1 -*-
"""ANSI Color definitions and functions"""
# Copyright (C) 2000-2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import os
import sys
# Escape for ANSI colors
AnsiEsc = "\x1b[%sm"
# type numbers
AnsiType = {
'bold': '1',
'light': '2',
'blink': '5',
'invert': '7',
}
# color numbers (the capitalized colors are bright)
AnsiColor = {
'default': '0',
'black': '30',
'red': '31',
'green': '32',
'yellow': '33',
'blue': '34',
'purple': '35',
'cyan': '36',
'white': '37',
'Black': '40',
'Red': '41',
'Green': '42',
'Yellow': '43',
'Blue': '44',
'Purple': '45',
'Cyan': '46',
'White': '47',
}
def esc_ansicolor (color):
"""convert a named color definition to an escaped ANSI color"""
ctype = ''
if ";" in color:
ctype, color = color.split(";", 1)
if not AnsiType.has_key(ctype):
print >>sys.stderr, "invalid ANSI color type", repr(ctype)
print >>sys.stderr, "valid values are", AnsiType.keys()
ctype = ''
else:
ctype = AnsiType[ctype]+";"
if not AnsiColor.has_key(color):
print >>sys.stderr, "invalid ANSI color name", repr(color)
print >>sys.stderr, "valid values are", AnsiColor.keys()
cnum = '0'
else:
cnum = AnsiColor[color]
return AnsiEsc % (ctype+cnum)
AnsiReset = esc_ansicolor("default")
def colorize (text, color=None):
"return text colorized if TERM is set"
if (color is not None) and os.environ.get('TERM'):
color = esc_ansicolor(color)
return '%s%s%s' % (color, text, AnsiReset)
else:
return text

View file

@ -24,10 +24,11 @@ import Cookie
import sets
import urllib
import _linkchecker_configdata
import bk
import bk.containers
import linkcheck
import linkcheck.i18n
import linkcheck.log
import linkcheck.Threader
try:
import threading
except ImportError:
@ -69,7 +70,7 @@ def _check_morsel (m, host, path):
return None
# check expiry date (if its stored)
if m["expires"]:
debug(BRING_IT_ON, "Cookie expires", m["expires"])
bk.log.debug(linkcheck.LOG_CHECK, "Cookie expires", m["expires"])
# XXX
return m.output(header='').strip()
@ -90,7 +91,6 @@ class Configuration (dict):
# reduceThreads(). Ok, this is a hack but ItWorksForMe(tm).
self.reduceCount = 0
def reset (self):
"""Reset to default values"""
self['linknumber'] = 0
@ -174,55 +174,47 @@ class Configuration (dict):
self.setThreads(10)
self.urlSeen = sets.Set()
self.urlSeenLock = threading.Lock()
self.urlCache = linkcheck.containers.LRU(MAX_URL_CACHE)
self.urlCache = bk.containers.LRU(MAX_URL_CACHE)
self.urlCacheLock = threading.Lock()
self.robotsTxtCache = linkcheck.containers.LRU(MAX_ROBOTS_TXT_CACHE)
self.robotsTxtCache = bk.containers.LRU(MAX_ROBOTS_TXT_CACHE)
self.robotsTxtCacheLock = threading.Lock()
self.urls = []
self.urlCounter = 0
self.urlsLock = threading.Lock()
# basic data lock (eg for cookies, link numbers etc.)
self.dataLock = threading.Lock()
self.cookies = linkcheck.containers.LRU(MAX_COOKIES_CACHE)
self.cookies = bk.containers.LRU(MAX_COOKIES_CACHE)
def setThreads (self, num):
debug(HURT_ME_PLENTY, "set threading with %d threads"%num)
bk.log.debug(linkcheck.LOG_CHECK, "set threading with %d threads"%num)
self.threader.threads_max = num
if num>0:
sys.setcheckinterval(50)
else:
sys.setcheckinterval(100)
def newLogger (self, logtype, dict={}):
args = {}
args.update(self[logtype])
args.update(dict)
from linkcheck.log import Loggers
return Loggers[logtype](**args)
return linkcheck.Loggers[logtype](**args)
def addLogger(self, logtype, loggerClass, logargs={}):
"add a new logger type"
from linkcheck.log import Loggers
Loggers[logtype] = loggerClass
linkcheck.Loggers[logtype] = loggerClass
self[logtype] = logargs
def log_init (self):
if not self["quiet"]: self["log"].init()
for log in self["fileoutput"]:
log.init()
def log_endOfOutput (self):
if not self["quiet"]:
self["log"].endOfOutput(linknumber=self['linknumber'])
for log in self["fileoutput"]:
log.endOfOutput(linknumber=self['linknumber'])
def incrementLinknumber (self):
try:
self.dataLock.acquire()
@ -230,19 +222,15 @@ class Configuration (dict):
finally:
self.dataLock.release()
def hasMoreUrls (self):
return self.urls
def finished (self):
return self.threader.finished() and not self.urls
def finish (self):
self.threader.finish()
def appendUrl (self, urlData):
self.urlsLock.acquire()
try:
@ -260,7 +248,6 @@ class Configuration (dict):
finally:
self.urlsLock.release()
def filterUrlQueue (self):
"""remove already cached urls from queue"""
# note: url lock must be acquired
@ -270,7 +257,6 @@ class Configuration (dict):
print >>sys.stderr, \
i18n._("removed %d cached urls from incoming queue")%removed
def getUrl (self):
"""get first url in queue and return it"""
self.urlsLock.acquire()
@ -281,11 +267,9 @@ class Configuration (dict):
finally:
self.urlsLock.release()
def checkUrl (self, url):
self.threader.start_thread(url.check, ())
def urlSeen_has_key (self, key):
self.urlSeenLock.acquire()
try:
@ -293,7 +277,6 @@ class Configuration (dict):
finally:
self.urlSeenLock.release()
def urlSeen_set (self, key):
self.urlSeenLock.acquire()
try:
@ -301,7 +284,6 @@ class Configuration (dict):
finally:
self.urlSeenLock.release()
def urlCache_has_key (self, key):
self.urlCacheLock.acquire()
try:
@ -309,7 +291,6 @@ class Configuration (dict):
finally:
self.urlCacheLock.release()
def urlCache_get (self, key):
self.urlCacheLock.acquire()
try:
@ -317,16 +298,14 @@ class Configuration (dict):
finally:
self.urlCacheLock.release()
def urlCache_set (self, key, val):
self.urlCacheLock.acquire()
try:
debug(NIGHTMARE, "caching", repr(key))
bk.log.debug(linkcheck.LOG_CHECK, "caching", repr(key))
self.urlCache[key] = val
finally:
self.urlCacheLock.release()
def robotsTxtCache_has_key (self, key):
self.robotsTxtCacheLock.acquire()
try:
@ -334,7 +313,6 @@ class Configuration (dict):
finally:
self.robotsTxtCacheLock.release()
def robotsTxtCache_get (self, key):
self.robotsTxtCacheLock.acquire()
try:
@ -342,7 +320,6 @@ class Configuration (dict):
finally:
self.robotsTxtCacheLock.release()
def robotsTxtCache_set (self, key, val):
self.robotsTxtCacheLock.acquire()
try:
@ -350,7 +327,6 @@ class Configuration (dict):
finally:
self.robotsTxtCacheLock.release()
def log_newUrl (self, url):
self.logLock.acquire()
try:
@ -361,25 +337,23 @@ class Configuration (dict):
finally:
self.logLock.release()
def storeCookies (self, headers, host):
self.dataLock.acquire()
try:
output = []
for h in headers.getallmatchingheaders("Set-Cookie"):
output.append(h)
debug(BRING_IT_ON, "Store Cookie", h)
bk.log.debug(linkcheck.LOG_CHECK, "Store Cookie", h)
c = self.cookies.setdefault(host, Cookie.SimpleCookie())
c.load(h)
return output
finally:
self.dataLock.release()
def getCookies (self, host, path):
self.dataLock.acquire()
try:
debug(BRING_IT_ON, "Get Cookie", host, path)
bk.log.debug(linkcheck.LOG_CHECK, "Get Cookie", host, path)
if not self.cookies.has_key(host):
return []
cookievals = []
@ -391,7 +365,6 @@ class Configuration (dict):
finally:
self.dataLock.release()
def read (self, files = []):
cfiles = files[:]
if not cfiles:
@ -402,86 +375,107 @@ class Configuration (dict):
cfiles.append(norm("~/.linkcheckerrc"))
self.readConfig(cfiles)
def readConfig (self, files):
"""this big function reads all the configuration parameters
used in the linkchecker module."""
debug(BRING_IT_ON, "reading configuration from", files)
from linkcheck.log import Loggers
bk.log.debug(linkcheck.LOG_CHECK, "reading configuration from", files)
try:
cfgparser = ConfigParser.ConfigParser()
cfgparser.read(files)
except ConfigParser.Error, msg:
debug(BRING_IT_ON, msg)
bk.log.debug(linkcheck.LOG_CHECK, msg)
return
section="output"
for key in Loggers.keys():
for key in linkcheck.Loggers.keys():
if cfgparser.has_section(key):
for opt in cfgparser.options(key):
try:
self[key][opt] = cfgparser.get(key, opt)
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
self[key]['fields'] = [f.strip() for f in cfgparser.get(key, 'fields').split(',')]
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
log = cfgparser.get(section, "log")
if Loggers.has_key(log):
if linkcheck.Loggers.has_key(log):
self['log'] = self.newLogger(log)
else:
warn(i18n._("invalid log option '%s'") % log)
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
if cfgparser.getboolean(section, "verbose"):
self["verbose"] = True
self["warnings"] = True
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
try: self["quiet"] = cfgparser.getboolean(section, "quiet")
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
try: self["status"] = cfgparser.getboolean(section, "status")
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
try: self["warnings"] = cfgparser.getboolean(section, "warnings")
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
self["quiet"] = cfgparser.getboolean(section, "quiet")
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
self["status"] = cfgparser.getboolean(section, "status")
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
self["warnings"] = cfgparser.getboolean(section, "warnings")
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
filelist = cfgparser.get(section, "fileoutput").split(",")
for arg in filelist:
arg = arg.strip()
# no file output for the blacklist and none Logger
if Loggers.has_key(arg) and arg not in ["blacklist", "none"]:
if linkcheck.Loggers.has_key(arg) and arg not in ["blacklist", "none"]:
self['fileoutput'].append(
self.newLogger(arg, {'fileoutput':1}))
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
section="checking"
try:
num = cfgparser.getint(section, "threads")
self.setThreads(num)
except ConfigParser.Error: debug(NIGHTMARE, msg)
try: self["anchors"] = cfgparser.getboolean(section, "anchors")
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
except ConfigParser.Error:
bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
self["anchors"] = cfgparser.getboolean(section, "anchors")
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
num = cfgparser.getint(section, "recursionlevel")
self["recursionlevel"] = num
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
try: self["strict"] = cfgparser.getboolean(section, "strict")
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
self["strict"] = cfgparser.getboolean(section, "strict")
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
wr = cfgparser.get(section, "warningregex")
if wr:
self["warningregex"] = re.compile(wr)
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
try: self["warnsizebytes"] = int(cfgparser.get(section, "warnsizebytes"))
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
self["warnsizebytes"] = int(cfgparser.get(section, "warnsizebytes"))
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
self["nntpserver"] = cfgparser.get(section, "nntpserver")
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
self["interactive"] = cfgparser.getboolean(section, "interactive")
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
self["anchorcaching"] = cfgparser.getboolean(section, "anchorcaching")
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
section = "authentication"
try:
@ -494,7 +488,8 @@ class Configuration (dict):
'user': auth[1],
'password': auth[2]})
i += 1
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
section = "filtering"
try:
@ -506,8 +501,13 @@ class Configuration (dict):
break
self["externlinks"].append(linkcheck.getLinkPat(ctuple[0], strict=int(ctuple[1])))
i += 1
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
try: self["internlinks"].append(linkcheck.getLinkPat(cfgparser.get(section, "internlinks")))
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
try: self["denyallow"] = cfgparser.getboolean(section, "denyallow")
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
self["internlinks"].append(linkcheck.getLinkPat(cfgparser.get(section, "internlinks")))
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)
try:
self["denyallow"] = cfgparser.getboolean(section, "denyallow")
except ConfigParser.Error, msg:
bk.log.debug(linkcheck.LOG_CHECK, msg)

View file

@ -11,57 +11,11 @@
__version__ = '2.3.0'
import Type, Opcode, Status, Class
from Base import DnsRequest, DNSError
from Lib import DnsResult
Error=DNSError
from lazy import *
Request = DnsRequest
Result = DnsResult
import Base
import Lib
import linkcheck.DNS.Base
linkcheck.DNS.Base.DiscoverNameServers()
Error = Base.DNSError
Request = Base.DnsRequest
Result = Lib.DnsResult
#
# $Log$
# Revision 1.8 2004/07/07 18:01:59 calvin
# new module layout
#
# Revision 1.7 2003/07/04 14:23:22 calvin
# add coding line
#
# Revision 1.6 2003/01/05 17:52:53 calvin
# fix
#
# Revision 1.5 2003/01/05 17:39:19 calvin
# pychecker fixes
#
# Revision 1.4 2002/11/26 23:27:43 calvin
# update to Python >= 2.2.1
#
# Revision 1.8 2002/05/06 06:17:49 anthonybaxter
# found that the old README file called itself release 2.2. So make
# this one 2.3...
#
# Revision 1.7 2002/05/06 06:16:15 anthonybaxter
# make some sort of reasonable version string. releasewards ho!
#
# Revision 1.6 2002/03/19 13:05:02 anthonybaxter
# converted to class based exceptions (there goes the python1.4 compatibility :)
#
# removed a quite gross use of 'eval()'.
#
# Revision 1.5 2002/03/19 12:41:33 anthonybaxter
# tabnannied and reindented everything. 4 space indent, no tabs.
# yay.
#
# Revision 1.4 2001/11/26 17:57:51 stroeder
# Added __version__
#
# Revision 1.3 2001/08/09 09:08:55 anthonybaxter
# added identifying header to top of each file
#
# Revision 1.2 2001/07/19 06:57:07 anthony
# cvs keywords added
#
#
Base.DiscoverNameServers()

View file

@ -86,20 +86,6 @@ def getLastWordBoundary (s, width):
return width-1
def applyTable (table, s):
"apply a table of replacement pairs to str"
for mapping in table:
s = s.replace(mapping[0], mapping[1])
return s
def sqlify (s):
"Escape special SQL chars and strings"
if not s:
return "NULL"
return "'%s'"%applyTable(SQLTable, s)
def htmlify (s):
"Escape special HTML chars and strings"
return applyTable(HtmlTable, s)

View file

@ -1,49 +0,0 @@
# -*- coding: iso-8859-1 -*-
"""XML utility functions"""
# Copyright (C) 2003-2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
__version__ = "$Revision$"[11:-2]
__date__ = "$Date$"[7:-2]
import xml.sax.saxutils
xmlattr_entities = {
"&": "&",
"<": "&lt;",
">": "&gt;",
"\"": "&quot;",
}
def xmlquote (s):
"""quote characters for XML"""
return xml.sax.saxutils.escape(s)
def xmlquoteattr (s):
"""quote XML attribute, ready for inclusion with double quotes"""
return xml.sax.saxutils.escape(s, xmlattr_entities)
def xmlunquote (s):
"""unquote characters from XML"""
return xml.sax.saxutils.unescape(s)
def xmlunquoteattr (s):
"""unquote attributes from XML"""
return xml.sax.saxutils.unescape(s, xmlattr_entities)

View file

@ -16,9 +16,10 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import re
import sys
import urlparse
import re
import time
import linkcheck.i18n
# logger areas
@ -32,9 +33,36 @@ class LinkCheckerError (Exception):
pass
def strtime (t):
"""return ISO 8601 formatted time"""
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t)) + \
strtimezone()
def strduration (duration):
"""return string formatted time duration"""
name = linkcheck.i18n._("seconds")
if duration > 60:
duration = duration / 60
name = linkcheck.i18n._("minutes")
if duration > 60:
duration = duration / 60
name = linkcheck.i18n._("hours")
return " %.3f %s"%(duration, name)
def strtimezone ():
"""return timezone info, %z on some platforms, but not supported on all"""
if time.daylight:
zone = time.altzone
else:
zone = time.timezone
return "%+04d" % int(-zone/3600)
def getLinkPat (arg, strict=False):
"""get a link pattern matcher for intern/extern links"""
linkcheck.log.debug(LOG_CHECK, "Link pattern %r", arg)
bk.log.debug(LOG_CHECK, "Link pattern %r", arg)
if arg[0:1] == '!':
pattern = arg[1:]
negate = True
@ -48,48 +76,37 @@ def getLinkPat (arg, strict=False):
}
# file extensions we can parse recursively
extensions = {
"html": re.compile(r'(?i)\.s?html?$'),
"opera": re.compile(r'^(?i)opera.adr$'), # opera bookmark file
"css": re.compile(r'(?i)\.css$'), # CSS stylesheet
# "text": re.compile(r'(?i)\.(txt|xml|tsv|csv|sgml?|py|java|cc?|cpp|h)$'),
}
import linkcheck.FileUrlData
import linkcheck.IgnoredUrlData
import linkcheck.FtpUrlData
import linkcheck.GopherUrlData
import linkcheck.HttpUrlData
import linkcheck.HttpsUrlData
import linkcheck.MailtoUrlData
import linkcheck.TelnetUrlData
import linkcheck.NntpUrlData
def set_intern_url (url, klass, config):
"""Precondition: config['strict'] is true (ie strict checking) and
recursion level is zero (ie url given on the command line)"""
if klass == linkcheck.FileUrlData.FileUrlData:
linkcheck.log.debug(LOG_CHECK, "Add intern pattern ^file:")
config['internlinks'].append(getLinkPat("^file:"))
elif klass in [linkcheck.HttpUrlData.HttpUrlData,
linkcheck.HttpsUrlData.HttpsUrlData,
linkcheck.FtpUrlData.FtpUrlData]:
domain = urlparse.urlsplit(url)[1]
if domain:
domain = "://%s"%re.escape(domain)
debug(BRING_IT_ON, "Add intern domain", domain)
# add scheme colon to link pattern
config['internlinks'].append(getLinkPat(domain))
import linkcheck.logger
def printStatus (config, curtime, start_time):
tocheck = len(config.urls)
links = config['linknumber']
active = config.threader.active_threads()
duration = linkcheck.logger.strduration(curtime - start_time)
duration = strduration(curtime - start_time)
print >>sys.stderr, linkcheck.i18n._("%5d urls queued, %4d links checked, %2d active threads, runtime %s")%\
(tocheck, links, active, duration)
import linkcheck.logger.StandardLogger
import linkcheck.logger.HtmlLogger
import linkcheck.logger.ColoredLogger
import linkcheck.logger.GMLLogger
import linkcheck.logger.SQLLogger
import linkcheck.logger.CSVLogger
import linkcheck.logger.BlacklistLogger
import linkcheck.logger.XMLLogger
import linkcheck.logger.NoneLogger
# default logger classes
Loggers = {
"text": linkcheck.logger.StandardLogger.StandardLogger,
"html": linkcheck.logger.HtmlLogger.HtmlLogger,
"colored": linkcheck.logger.ColoredLogger.ColoredLogger,
"gml": linkcheck.logger.GMLLogger.GMLLogger,
"sql": linkcheck.logger.SQLLogger.SQLLogger,
"csv": linkcheck.logger.CSVLogger.CSVLogger,
"blacklist": linkcheck.logger.BlacklistLogger.BlacklistLogger,
"xml": linkcheck.logger.XMLLogger.XMLLogger,
"none": linkcheck.logger.NoneLogger.NoneLogger,
}
# for easy printing: a comma separated logger list
LoggerKeys = ", ".join(Loggers.keys())

View file

@ -19,10 +19,10 @@
import re
import os
import urlparse
import linkcheck.UrlData
import linkcheck.checker
# OSError is thrown on Windows when a file is not found
linkcheck.UrlData.ExcList.append(OSError)
linkcheck.checker.ExcList.append(OSError)
# if file extension was fruitless, look at the content
contents = {

View file

@ -78,18 +78,6 @@ def get_absolute_url (urlName, baseRef, parentName):
return ""
# we catch these exceptions, all other exceptions are internal
# or system errors
ExcList = [
IOError,
ValueError, # from httplib.py
linkcheck.LinkCheckerError,
linkcheck.DNS.Error,
socket.timeout,
socket.error,
select.error,
]
if hasattr(socket, "sslerror"):
ExcList.append(socket.sslerror)
@ -226,15 +214,13 @@ class UrlData (object):
def check (self):
try:
self._check()
except KeyboardInterrupt:
raise
except (socket.error, select.error):
# on Unix, ctrl-c can raise
# error: (4, 'Interrupted system call')
etype, value = sys.exc_info()[:2]
if etype!=4:
raise
except linkcheck.test_support.Error:
except (KeyboardInterrupt, linkcheck.test_support.Error):
raise
except:
internal_error()

View file

@ -17,6 +17,25 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import time
import socket
import select
import re
import urlparse
import linkcheck
import linkcheck.DNS
# we catch these exceptions, all other exceptions are internal
# or system errors
ExcList = [
IOError,
ValueError, # from httplib.py
linkcheck.LinkCheckerError,
linkcheck.DNS.Error,
socket.timeout,
socket.error,
select.error,
]
# main check function
@ -66,6 +85,32 @@ import linkcheck.checker.MailtoUrlData
import linkcheck.checker.TelnetUrlData
import linkcheck.checker.NntpUrlData
# file extensions we can parse recursively
extensions = {
"html": re.compile(r'(?i)\.s?html?$'),
"opera": re.compile(r'^(?i)opera.adr$'), # opera bookmark file
"css": re.compile(r'(?i)\.css$'), # CSS stylesheet
# "text": re.compile(r'(?i)\.(txt|xml|tsv|csv|sgml?|py|java|cc?|cpp|h)$'),
}
def set_intern_url (url, klass, config):
"""Precondition: config['strict'] is true (ie strict checking) and
recursion level is zero (ie url given on the command line)"""
if klass == linkcheck.checker.FileUrlData.FileUrlData:
linkcheck.log.debug(linkcheck.LOG_CHECK, "Add intern pattern ^file:")
config['internlinks'].append(getLinkPat("^file:"))
elif klass in [linkcheck.checker.HttpUrlData.HttpUrlData,
linkcheck.checker.HttpsUrlData.HttpsUrlData,
linkcheck.checker.FtpUrlData.FtpUrlData]:
domain = urlparse.urlsplit(url)[1]
if domain:
domain = "://%s"%re.escape(domain)
linkcheck.log.debug(linkcheck.LOG_CHECK, "Add intern domain", domain)
# add scheme colon to link pattern
config['internlinks'].append(getLinkPat(domain))
def getUrlDataFrom (urlName, recursionLevel, config, parentName=None,
baseRef=None, line=0, column=0, name=None,
cmdline=None):

View file

@ -1,200 +0,0 @@
# -*- coding: iso-8859-1 -*-
"""special container classes"""
# Copyright (C) 2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
__version__ = "$Revision$"[11:-2]
__date__ = "$Date$"[7:-2]
class SetList (list):
"""a list that eliminates all duplicates
"""
def append (self, x):
"""append only if not already there"""
if x not in self:
super(SetList, self).append(x)
def extend (self, x):
"""extend while eliminating duplicates by appending item for item"""
for i in x:
self.append(i)
def insert (self, i, x):
"""insert only if not already there"""
if x not in self:
super(SetList, self).insert(i, x)
def __setitem__ (self, key, value):
"""set new value, and eliminate old duplicates (if any)"""
oldvalues = []
for i in range(len(self)):
if self[i]==value:
oldvalues.append(i)
super(SetList, self).__setitem__(key, value)
# remove old duplicates (from last to first)
oldvalues.reverse()
for i in oldvalues:
if i!=key:
del self[key]
class ListDict (dict):
"""a dictionary whose iterators reflect the order in which elements
were added
"""
def __init__ (self):
"""initialize sorted key list"""
# sorted list of keys
self._keys = []
def __setitem__ (self, key, value):
"""add key,value to dict, append key to sorted list"""
if not self.has_key(key):
self._keys.append(key)
super(ListDict, self).__setitem__(key, value)
def __delitem__ (self, key):
"""remove key from dict"""
self._keys.remove(key)
super(ListDict, self).__delitem__(key)
def values (self):
"""return sorted list of values"""
return [self[k] for k in self._keys]
def items (self):
"""return sorted list of items"""
return [(k, self[k]) for k in self._keys]
def keys (self):
"""return sorted list of keys"""
return self._keys[:]
def itervalues (self):
"""return iterator over sorted values"""
return iter(self.values())
def iteritems (self):
"""return iterator over sorted items"""
return iter(self.items())
def iterkeys (self):
"""return iterator over sorted keys"""
return iter(self.keys())
def clear (self):
"""remove all dict entires"""
self._keys = []
super(ListDict, self).clear()
class LRU (object):
"""
Implementation of a length-limited O(1) LRU queue.
Built for and used by PyPE:
http://pype.sourceforge.net
Copyright 2003 Josiah Carlson. (Licensed under the GPL)
"""
class Node (object):
def __init__ (self, prev, me):
self.prev = prev
self.me = me
self.next = None
def __init__ (self, count, pairs=[]):
self.count = max(count, 1)
self.d = {}
self.first = None
self.last = None
for key, value in pairs:
self[key] = value
def __contains__ (self, obj):
return obj in self.d
def has_key (self, obj):
return self.d.has_key(obj)
def __getitem__ (self, obj):
a = self.d[obj].me
self[a[0]] = a[1]
return a[1]
def __setitem__ (self, obj, val):
if obj in self.d:
del self[obj]
nobj = self.Node(self.last, (obj, val))
if self.first is None:
self.first = nobj
if self.last:
self.last.next = nobj
self.last = nobj
self.d[obj] = nobj
if len(self.d) > self.count:
if self.first == self.last:
self.first = None
self.last = None
return
a = self.first
a.next.prev = None
self.first = a.next
a.next = None
del self.d[a.me[0]]
del a
def __delitem__ (self, obj):
nobj = self.d[obj]
if nobj.prev:
nobj.prev.next = nobj.next
else:
self.first = nobj.next
if nobj.next:
nobj.next.prev = nobj.prev
else:
self.last = nobj.prev
del self.d[obj]
def __iter__ (self):
cur = self.first
while cur != None:
cur2 = cur.next
yield cur.me[1]
cur = cur2
def iteritems (self):
cur = self.first
while cur != None:
cur2 = cur.next
yield cur.me
cur = cur2
def iterkeys (self):
return iter(self.d)
def itervalues (self):
for i,j in self.iteritems():
yield j
def keys (self):
return self.d.keys()
def setdefault (self, key, failobj=None):
if not self.has_key(key):
self[key] = failobj
return self[key]

View file

@ -1,34 +0,0 @@
# -*- coding: iso-8859-1 -*-
"""internationalization support"""
# Copyright (C) 2000-2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# i18n suppport
import os
from _linkchecker_configdata import install_data
def init_gettext ():
global _
try:
import gettext
domain = 'linkcheck'
localedir = os.path.join(install_data, 'share', 'locale')
_ = gettext.translation(domain, localedir).gettext
except (IOError, ImportError):
# default gettext function
_ = lambda s: s
init_gettext()

View file

@ -17,10 +17,10 @@
import sys
import os
import linkcheck
import linkcheck.logger.Logger
class BlacklistLogger (linkcheck.logger.Logger):
class BlacklistLogger (linkcheck.logger.Logger.Logger):
"""Updates a blacklist of wrong links. If a link on the blacklist
is working (again), it is removed from the list. So after n days
we have only links on the list which failed for n days.

View file

@ -17,7 +17,9 @@
import time
import csv
import linkcheck
import linkcheck.i18n
import linkcheck.logger.StandardLogger
import linkcheck.logger.Logger
class CSVLogger (linkcheck.logger.StandardLogger.StandardLogger):
@ -30,7 +32,7 @@ class CSVLogger (linkcheck.logger.StandardLogger.StandardLogger):
self.lineterminator = "\n"
def init (self):
linkcheck.logger.Logger.init(self)
linkcheck.logger.Logger.Logger.init(self)
if self.fd is None:
return
self.starttime = time.time()

View file

@ -15,10 +15,12 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import linkcheck
import linkcheck.i18n
import linkcheck.AnsiColor
import linkcheck.logger.StandardLogger
class ColoredLogger (linkcheck.logger.StandardLogger):
class ColoredLogger (linkcheck.logger.StandardLogger.StandardLogger):
"""ANSI colorized output"""
def __init__ (self, **args):

View file

@ -15,7 +15,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import linkcheck
import linkcheck.logger.Logger
class NoneLogger (linkcheck.logger.Logger.Logger):

View file

@ -17,6 +17,23 @@
import time
import linkcheck
import linkcheck.i18n
import linkcheck.logger.StandardLogger
import linkcheck.logger.Logger
def applyTable (table, s):
"apply a table of replacement pairs to str"
for mapping in table:
s = s.replace(mapping[0], mapping[1])
return s
def sqlify (s):
"Escape special SQL chars and strings"
if not s:
return "NULL"
return "'%s'"%applyTable(SQLTable, s)
class SQLLogger (linkcheck.logger.StandardLogger.StandardLogger):
@ -33,7 +50,7 @@ class SQLLogger (linkcheck.logger.StandardLogger.StandardLogger):
self.starttime = time.time()
if self.has_field("intro"):
self.fd.write("-- "+(linkcheck.i18n._("created by %s at %s\n") % (linkcheck.Config.AppName,
linkcheck.logger.strtime(self.starttime))))
linkcheck.strtime(self.starttime))))
self.fd.write("-- "+(linkcheck.i18n._("Get the newest version at %s\n") % linkcheck.Config.Url))
self.fd.write("-- "+(linkcheck.i18n._("Write comments and bugs to %s\n\n") % \
linkcheck.Config.Email))
@ -47,19 +64,19 @@ class SQLLogger (linkcheck.logger.StandardLogger.StandardLogger):
" values "
"(%s,%d,%s,%s,%s,%s,%s,%s,%d,%s,%d,%d,%s,%d,%d,%d,%d)%s\n" % \
(self.dbname,
linkcheck.StringUtil.sqlify(urlData.urlName),
sqlify(urlData.urlName),
urlData.recursionLevel,
linkcheck.StringUtil.sqlify(linkcheck.url.url_quote(urlData.parentName or "")),
linkcheck.StringUtil.sqlify(urlData.baseRef),
linkcheck.StringUtil.sqlify(urlData.errorString),
linkcheck.StringUtil.sqlify(urlData.validString),
linkcheck.StringUtil.sqlify(urlData.warningString),
linkcheck.StringUtil.sqlify(urlData.infoString),
sqlify(linkcheck.url.url_quote(urlData.parentName or "")),
sqlify(urlData.baseRef),
sqlify(urlData.errorString),
sqlify(urlData.validString),
sqlify(urlData.warningString),
sqlify(urlData.infoString),
urlData.valid,
linkcheck.StringUtil.sqlify(linkcheck.url.url_quote(urlData.url)),
sqlify(linkcheck.url.url_quote(urlData.url)),
urlData.line,
urlData.column,
linkcheck.StringUtil.sqlify(urlData.name),
sqlify(urlData.name),
urlData.checktime,
urlData.dltime,
urlData.dlsize,

View file

@ -16,9 +16,38 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import time
import xml.sax.saxutils
import linkcheck
xmlattr_entities = {
"&": "&amp;",
"<": "&lt;",
">": "&gt;",
"\"": "&quot;",
}
def xmlquote (s):
"""quote characters for XML"""
return xml.sax.saxutils.escape(s)
def xmlquoteattr (s):
"""quote XML attribute, ready for inclusion with double quotes"""
return xml.sax.saxutils.escape(s, xmlattr_entities)
def xmlunquote (s):
"""unquote characters from XML"""
return xml.sax.saxutils.unescape(s)
def xmlunquoteattr (s):
"""unquote attributes from XML"""
return xml.sax.saxutils.unescape(s, xmlattr_entities)
class XMLLogger (linkcheck.logger.StandardLogger.StandardLogger):
"""XML output mirroring the GML structure. Easy to parse with any XML
tool."""
@ -56,7 +85,7 @@ class XMLLogger (linkcheck.logger.StandardLogger.StandardLogger):
self.fd.write(">\n")
if self.has_field("realurl"):
self.fd.write(" <label>%s</label>\n" %\
linkcheck.XmlUtils.xmlquote(linkcheck.url.url_quote(node.url)))
xmlquote(linkcheck.url.url_quote(node.url)))
self.fd.write(" <data>\n")
if node.dltime>=0 and self.has_field("dltime"):
self.fd.write(" <dltime>%f</dltime>\n" % node.dltime)
@ -85,7 +114,7 @@ class XMLLogger (linkcheck.logger.StandardLogger.StandardLogger):
self.fd.write(">\n")
if self.has_field("url"):
self.fd.write(" <label>%s</label>\n" % \
linkcheck.XmlUtils.linkcheck.xmlquote(node.urlName))
xmlquote(node.urlName))
self.fd.write(" <data>\n")
if self.has_field("result"):
self.fd.write(" <valid>%d</valid>\n" % \

View file

@ -16,60 +16,3 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import time
import linkcheck
import linkcheck.i18n
def strtime (t):
"""return ISO 8601 formatted time"""
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t)) + \
strtimezone()
def strduration (duration):
"""return string formatted time duration"""
name = linkcheck.i18n._("seconds")
if duration > 60:
duration = duration / 60
name = linkcheck.i18n._("minutes")
if duration > 60:
duration = duration / 60
name = linkcheck.i18n._("hours")
return " %.3f %s"%(duration, name)
def strtimezone ():
"""return timezone info, %z on some platforms, but not supported on all"""
if time.daylight:
zone = time.altzone
else:
zone = time.timezone
return "%+04d" % int(-zone/3600)
import linkcheck.logger.StandardLogger
import linkcheck.logger.HtmlLogger
import linkcheck.logger.ColoredLogger
import linkcheck.logger.GMLLogger
import linkcheck.logger.SQLLogger
import linkcheck.logger.CSVLogger
import linkcheck.logger.BlacklistLogger
import linkcheck.logger.XMLLogger
import linkcheck.logger.NoneLogger
# default logger classes
Loggers = {
"text": linkcheck.logger.StandardLogger.StandardLogger,
"html": linkcheck.logger.HtmlLogger.HtmlLogger,
"colored": linkcheck.logger.ColoredLogger.ColoredLogger,
"gml": linkcheck.logger.GMLLogger.GMLLogger,
"sql": linkcheck.logger.SQLLogger.SQLLogger,
"csv": linkcheck.logger.CSVLogger.CSVLogger,
"blacklist": linkcheck.logger.BlacklistLogger.BlacklistLogger,
"xml": linkcheck.logger.XMLLogger.XMLLogger,
"none": linkcheck.logger.NoneLogger.NoneLogger,
}
# for easy printing: a comma separated logger list
LoggerKeys = ", ".join(Loggers.keys())