mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-05 07:20:58 +00:00
more import fixes
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1364 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
2d3e8a2b9b
commit
6476c8675d
18 changed files with 258 additions and 643 deletions
|
|
@ -1,83 +0,0 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
"""ANSI Color definitions and functions"""
|
||||
# Copyright (C) 2000-2004 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Escape for ANSI colors
|
||||
AnsiEsc = "\x1b[%sm"
|
||||
|
||||
# type numbers
|
||||
AnsiType = {
|
||||
'bold': '1',
|
||||
'light': '2',
|
||||
'blink': '5',
|
||||
'invert': '7',
|
||||
}
|
||||
|
||||
# color numbers (the capitalized colors are bright)
|
||||
AnsiColor = {
|
||||
'default': '0',
|
||||
'black': '30',
|
||||
'red': '31',
|
||||
'green': '32',
|
||||
'yellow': '33',
|
||||
'blue': '34',
|
||||
'purple': '35',
|
||||
'cyan': '36',
|
||||
'white': '37',
|
||||
'Black': '40',
|
||||
'Red': '41',
|
||||
'Green': '42',
|
||||
'Yellow': '43',
|
||||
'Blue': '44',
|
||||
'Purple': '45',
|
||||
'Cyan': '46',
|
||||
'White': '47',
|
||||
}
|
||||
|
||||
|
||||
def esc_ansicolor (color):
|
||||
"""convert a named color definition to an escaped ANSI color"""
|
||||
ctype = ''
|
||||
if ";" in color:
|
||||
ctype, color = color.split(";", 1)
|
||||
if not AnsiType.has_key(ctype):
|
||||
print >>sys.stderr, "invalid ANSI color type", repr(ctype)
|
||||
print >>sys.stderr, "valid values are", AnsiType.keys()
|
||||
ctype = ''
|
||||
else:
|
||||
ctype = AnsiType[ctype]+";"
|
||||
if not AnsiColor.has_key(color):
|
||||
print >>sys.stderr, "invalid ANSI color name", repr(color)
|
||||
print >>sys.stderr, "valid values are", AnsiColor.keys()
|
||||
cnum = '0'
|
||||
else:
|
||||
cnum = AnsiColor[color]
|
||||
return AnsiEsc % (ctype+cnum)
|
||||
|
||||
AnsiReset = esc_ansicolor("default")
|
||||
|
||||
|
||||
def colorize (text, color=None):
|
||||
"return text colorized if TERM is set"
|
||||
if (color is not None) and os.environ.get('TERM'):
|
||||
color = esc_ansicolor(color)
|
||||
return '%s%s%s' % (color, text, AnsiReset)
|
||||
else:
|
||||
return text
|
||||
|
|
@ -24,10 +24,11 @@ import Cookie
|
|||
import sets
|
||||
import urllib
|
||||
import _linkchecker_configdata
|
||||
import bk
|
||||
import bk.containers
|
||||
import linkcheck
|
||||
import linkcheck.i18n
|
||||
import linkcheck.log
|
||||
|
||||
import linkcheck.Threader
|
||||
try:
|
||||
import threading
|
||||
except ImportError:
|
||||
|
|
@ -69,7 +70,7 @@ def _check_morsel (m, host, path):
|
|||
return None
|
||||
# check expiry date (if its stored)
|
||||
if m["expires"]:
|
||||
debug(BRING_IT_ON, "Cookie expires", m["expires"])
|
||||
bk.log.debug(linkcheck.LOG_CHECK, "Cookie expires", m["expires"])
|
||||
# XXX
|
||||
return m.output(header='').strip()
|
||||
|
||||
|
|
@ -90,7 +91,6 @@ class Configuration (dict):
|
|||
# reduceThreads(). Ok, this is a hack but ItWorksForMe(tm).
|
||||
self.reduceCount = 0
|
||||
|
||||
|
||||
def reset (self):
|
||||
"""Reset to default values"""
|
||||
self['linknumber'] = 0
|
||||
|
|
@ -174,55 +174,47 @@ class Configuration (dict):
|
|||
self.setThreads(10)
|
||||
self.urlSeen = sets.Set()
|
||||
self.urlSeenLock = threading.Lock()
|
||||
self.urlCache = linkcheck.containers.LRU(MAX_URL_CACHE)
|
||||
self.urlCache = bk.containers.LRU(MAX_URL_CACHE)
|
||||
self.urlCacheLock = threading.Lock()
|
||||
self.robotsTxtCache = linkcheck.containers.LRU(MAX_ROBOTS_TXT_CACHE)
|
||||
self.robotsTxtCache = bk.containers.LRU(MAX_ROBOTS_TXT_CACHE)
|
||||
self.robotsTxtCacheLock = threading.Lock()
|
||||
self.urls = []
|
||||
self.urlCounter = 0
|
||||
self.urlsLock = threading.Lock()
|
||||
# basic data lock (eg for cookies, link numbers etc.)
|
||||
self.dataLock = threading.Lock()
|
||||
self.cookies = linkcheck.containers.LRU(MAX_COOKIES_CACHE)
|
||||
|
||||
self.cookies = bk.containers.LRU(MAX_COOKIES_CACHE)
|
||||
|
||||
def setThreads (self, num):
|
||||
debug(HURT_ME_PLENTY, "set threading with %d threads"%num)
|
||||
bk.log.debug(linkcheck.LOG_CHECK, "set threading with %d threads"%num)
|
||||
self.threader.threads_max = num
|
||||
if num>0:
|
||||
sys.setcheckinterval(50)
|
||||
else:
|
||||
sys.setcheckinterval(100)
|
||||
|
||||
|
||||
def newLogger (self, logtype, dict={}):
|
||||
args = {}
|
||||
args.update(self[logtype])
|
||||
args.update(dict)
|
||||
from linkcheck.log import Loggers
|
||||
return Loggers[logtype](**args)
|
||||
|
||||
return linkcheck.Loggers[logtype](**args)
|
||||
|
||||
def addLogger(self, logtype, loggerClass, logargs={}):
|
||||
"add a new logger type"
|
||||
from linkcheck.log import Loggers
|
||||
Loggers[logtype] = loggerClass
|
||||
linkcheck.Loggers[logtype] = loggerClass
|
||||
self[logtype] = logargs
|
||||
|
||||
|
||||
def log_init (self):
|
||||
if not self["quiet"]: self["log"].init()
|
||||
for log in self["fileoutput"]:
|
||||
log.init()
|
||||
|
||||
|
||||
def log_endOfOutput (self):
|
||||
if not self["quiet"]:
|
||||
self["log"].endOfOutput(linknumber=self['linknumber'])
|
||||
for log in self["fileoutput"]:
|
||||
log.endOfOutput(linknumber=self['linknumber'])
|
||||
|
||||
|
||||
def incrementLinknumber (self):
|
||||
try:
|
||||
self.dataLock.acquire()
|
||||
|
|
@ -230,19 +222,15 @@ class Configuration (dict):
|
|||
finally:
|
||||
self.dataLock.release()
|
||||
|
||||
|
||||
def hasMoreUrls (self):
|
||||
return self.urls
|
||||
|
||||
|
||||
def finished (self):
|
||||
return self.threader.finished() and not self.urls
|
||||
|
||||
|
||||
def finish (self):
|
||||
self.threader.finish()
|
||||
|
||||
|
||||
def appendUrl (self, urlData):
|
||||
self.urlsLock.acquire()
|
||||
try:
|
||||
|
|
@ -260,7 +248,6 @@ class Configuration (dict):
|
|||
finally:
|
||||
self.urlsLock.release()
|
||||
|
||||
|
||||
def filterUrlQueue (self):
|
||||
"""remove already cached urls from queue"""
|
||||
# note: url lock must be acquired
|
||||
|
|
@ -270,7 +257,6 @@ class Configuration (dict):
|
|||
print >>sys.stderr, \
|
||||
i18n._("removed %d cached urls from incoming queue")%removed
|
||||
|
||||
|
||||
def getUrl (self):
|
||||
"""get first url in queue and return it"""
|
||||
self.urlsLock.acquire()
|
||||
|
|
@ -281,11 +267,9 @@ class Configuration (dict):
|
|||
finally:
|
||||
self.urlsLock.release()
|
||||
|
||||
|
||||
def checkUrl (self, url):
|
||||
self.threader.start_thread(url.check, ())
|
||||
|
||||
|
||||
def urlSeen_has_key (self, key):
|
||||
self.urlSeenLock.acquire()
|
||||
try:
|
||||
|
|
@ -293,7 +277,6 @@ class Configuration (dict):
|
|||
finally:
|
||||
self.urlSeenLock.release()
|
||||
|
||||
|
||||
def urlSeen_set (self, key):
|
||||
self.urlSeenLock.acquire()
|
||||
try:
|
||||
|
|
@ -301,7 +284,6 @@ class Configuration (dict):
|
|||
finally:
|
||||
self.urlSeenLock.release()
|
||||
|
||||
|
||||
def urlCache_has_key (self, key):
|
||||
self.urlCacheLock.acquire()
|
||||
try:
|
||||
|
|
@ -309,7 +291,6 @@ class Configuration (dict):
|
|||
finally:
|
||||
self.urlCacheLock.release()
|
||||
|
||||
|
||||
def urlCache_get (self, key):
|
||||
self.urlCacheLock.acquire()
|
||||
try:
|
||||
|
|
@ -317,16 +298,14 @@ class Configuration (dict):
|
|||
finally:
|
||||
self.urlCacheLock.release()
|
||||
|
||||
|
||||
def urlCache_set (self, key, val):
|
||||
self.urlCacheLock.acquire()
|
||||
try:
|
||||
debug(NIGHTMARE, "caching", repr(key))
|
||||
bk.log.debug(linkcheck.LOG_CHECK, "caching", repr(key))
|
||||
self.urlCache[key] = val
|
||||
finally:
|
||||
self.urlCacheLock.release()
|
||||
|
||||
|
||||
def robotsTxtCache_has_key (self, key):
|
||||
self.robotsTxtCacheLock.acquire()
|
||||
try:
|
||||
|
|
@ -334,7 +313,6 @@ class Configuration (dict):
|
|||
finally:
|
||||
self.robotsTxtCacheLock.release()
|
||||
|
||||
|
||||
def robotsTxtCache_get (self, key):
|
||||
self.robotsTxtCacheLock.acquire()
|
||||
try:
|
||||
|
|
@ -342,7 +320,6 @@ class Configuration (dict):
|
|||
finally:
|
||||
self.robotsTxtCacheLock.release()
|
||||
|
||||
|
||||
def robotsTxtCache_set (self, key, val):
|
||||
self.robotsTxtCacheLock.acquire()
|
||||
try:
|
||||
|
|
@ -350,7 +327,6 @@ class Configuration (dict):
|
|||
finally:
|
||||
self.robotsTxtCacheLock.release()
|
||||
|
||||
|
||||
def log_newUrl (self, url):
|
||||
self.logLock.acquire()
|
||||
try:
|
||||
|
|
@ -361,25 +337,23 @@ class Configuration (dict):
|
|||
finally:
|
||||
self.logLock.release()
|
||||
|
||||
|
||||
def storeCookies (self, headers, host):
|
||||
self.dataLock.acquire()
|
||||
try:
|
||||
output = []
|
||||
for h in headers.getallmatchingheaders("Set-Cookie"):
|
||||
output.append(h)
|
||||
debug(BRING_IT_ON, "Store Cookie", h)
|
||||
bk.log.debug(linkcheck.LOG_CHECK, "Store Cookie", h)
|
||||
c = self.cookies.setdefault(host, Cookie.SimpleCookie())
|
||||
c.load(h)
|
||||
return output
|
||||
finally:
|
||||
self.dataLock.release()
|
||||
|
||||
|
||||
def getCookies (self, host, path):
|
||||
self.dataLock.acquire()
|
||||
try:
|
||||
debug(BRING_IT_ON, "Get Cookie", host, path)
|
||||
bk.log.debug(linkcheck.LOG_CHECK, "Get Cookie", host, path)
|
||||
if not self.cookies.has_key(host):
|
||||
return []
|
||||
cookievals = []
|
||||
|
|
@ -391,7 +365,6 @@ class Configuration (dict):
|
|||
finally:
|
||||
self.dataLock.release()
|
||||
|
||||
|
||||
def read (self, files = []):
|
||||
cfiles = files[:]
|
||||
if not cfiles:
|
||||
|
|
@ -402,86 +375,107 @@ class Configuration (dict):
|
|||
cfiles.append(norm("~/.linkcheckerrc"))
|
||||
self.readConfig(cfiles)
|
||||
|
||||
|
||||
def readConfig (self, files):
|
||||
"""this big function reads all the configuration parameters
|
||||
used in the linkchecker module."""
|
||||
debug(BRING_IT_ON, "reading configuration from", files)
|
||||
from linkcheck.log import Loggers
|
||||
bk.log.debug(linkcheck.LOG_CHECK, "reading configuration from", files)
|
||||
try:
|
||||
cfgparser = ConfigParser.ConfigParser()
|
||||
cfgparser.read(files)
|
||||
except ConfigParser.Error, msg:
|
||||
debug(BRING_IT_ON, msg)
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
return
|
||||
|
||||
section="output"
|
||||
for key in Loggers.keys():
|
||||
for key in linkcheck.Loggers.keys():
|
||||
if cfgparser.has_section(key):
|
||||
for opt in cfgparser.options(key):
|
||||
try:
|
||||
self[key][opt] = cfgparser.get(key, opt)
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self[key]['fields'] = [f.strip() for f in cfgparser.get(key, 'fields').split(',')]
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
log = cfgparser.get(section, "log")
|
||||
if Loggers.has_key(log):
|
||||
if linkcheck.Loggers.has_key(log):
|
||||
self['log'] = self.newLogger(log)
|
||||
else:
|
||||
warn(i18n._("invalid log option '%s'") % log)
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
if cfgparser.getboolean(section, "verbose"):
|
||||
self["verbose"] = True
|
||||
self["warnings"] = True
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
try: self["quiet"] = cfgparser.getboolean(section, "quiet")
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
try: self["status"] = cfgparser.getboolean(section, "status")
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
try: self["warnings"] = cfgparser.getboolean(section, "warnings")
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self["quiet"] = cfgparser.getboolean(section, "quiet")
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self["status"] = cfgparser.getboolean(section, "status")
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self["warnings"] = cfgparser.getboolean(section, "warnings")
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
filelist = cfgparser.get(section, "fileoutput").split(",")
|
||||
for arg in filelist:
|
||||
arg = arg.strip()
|
||||
# no file output for the blacklist and none Logger
|
||||
if Loggers.has_key(arg) and arg not in ["blacklist", "none"]:
|
||||
if linkcheck.Loggers.has_key(arg) and arg not in ["blacklist", "none"]:
|
||||
self['fileoutput'].append(
|
||||
self.newLogger(arg, {'fileoutput':1}))
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
|
||||
section="checking"
|
||||
try:
|
||||
num = cfgparser.getint(section, "threads")
|
||||
self.setThreads(num)
|
||||
except ConfigParser.Error: debug(NIGHTMARE, msg)
|
||||
try: self["anchors"] = cfgparser.getboolean(section, "anchors")
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
except ConfigParser.Error:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self["anchors"] = cfgparser.getboolean(section, "anchors")
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
num = cfgparser.getint(section, "recursionlevel")
|
||||
self["recursionlevel"] = num
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
try: self["strict"] = cfgparser.getboolean(section, "strict")
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self["strict"] = cfgparser.getboolean(section, "strict")
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
wr = cfgparser.get(section, "warningregex")
|
||||
if wr:
|
||||
self["warningregex"] = re.compile(wr)
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
try: self["warnsizebytes"] = int(cfgparser.get(section, "warnsizebytes"))
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self["warnsizebytes"] = int(cfgparser.get(section, "warnsizebytes"))
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self["nntpserver"] = cfgparser.get(section, "nntpserver")
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self["interactive"] = cfgparser.getboolean(section, "interactive")
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self["anchorcaching"] = cfgparser.getboolean(section, "anchorcaching")
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
|
||||
section = "authentication"
|
||||
try:
|
||||
|
|
@ -494,7 +488,8 @@ class Configuration (dict):
|
|||
'user': auth[1],
|
||||
'password': auth[2]})
|
||||
i += 1
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
|
||||
section = "filtering"
|
||||
try:
|
||||
|
|
@ -506,8 +501,13 @@ class Configuration (dict):
|
|||
break
|
||||
self["externlinks"].append(linkcheck.getLinkPat(ctuple[0], strict=int(ctuple[1])))
|
||||
i += 1
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
try: self["internlinks"].append(linkcheck.getLinkPat(cfgparser.get(section, "internlinks")))
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
try: self["denyallow"] = cfgparser.getboolean(section, "denyallow")
|
||||
except ConfigParser.Error, msg: debug(NIGHTMARE, msg)
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self["internlinks"].append(linkcheck.getLinkPat(cfgparser.get(section, "internlinks")))
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self["denyallow"] = cfgparser.getboolean(section, "denyallow")
|
||||
except ConfigParser.Error, msg:
|
||||
bk.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
|
|
|
|||
|
|
@ -11,57 +11,11 @@
|
|||
|
||||
__version__ = '2.3.0'
|
||||
|
||||
import Type, Opcode, Status, Class
|
||||
from Base import DnsRequest, DNSError
|
||||
from Lib import DnsResult
|
||||
Error=DNSError
|
||||
from lazy import *
|
||||
Request = DnsRequest
|
||||
Result = DnsResult
|
||||
import Base
|
||||
import Lib
|
||||
|
||||
import linkcheck.DNS.Base
|
||||
linkcheck.DNS.Base.DiscoverNameServers()
|
||||
Error = Base.DNSError
|
||||
Request = Base.DnsRequest
|
||||
Result = Lib.DnsResult
|
||||
|
||||
#
|
||||
# $Log$
|
||||
# Revision 1.8 2004/07/07 18:01:59 calvin
|
||||
# new module layout
|
||||
#
|
||||
# Revision 1.7 2003/07/04 14:23:22 calvin
|
||||
# add coding line
|
||||
#
|
||||
# Revision 1.6 2003/01/05 17:52:53 calvin
|
||||
# fix
|
||||
#
|
||||
# Revision 1.5 2003/01/05 17:39:19 calvin
|
||||
# pychecker fixes
|
||||
#
|
||||
# Revision 1.4 2002/11/26 23:27:43 calvin
|
||||
# update to Python >= 2.2.1
|
||||
#
|
||||
# Revision 1.8 2002/05/06 06:17:49 anthonybaxter
|
||||
# found that the old README file called itself release 2.2. So make
|
||||
# this one 2.3...
|
||||
#
|
||||
# Revision 1.7 2002/05/06 06:16:15 anthonybaxter
|
||||
# make some sort of reasonable version string. releasewards ho!
|
||||
#
|
||||
# Revision 1.6 2002/03/19 13:05:02 anthonybaxter
|
||||
# converted to class based exceptions (there goes the python1.4 compatibility :)
|
||||
#
|
||||
# removed a quite gross use of 'eval()'.
|
||||
#
|
||||
# Revision 1.5 2002/03/19 12:41:33 anthonybaxter
|
||||
# tabnannied and reindented everything. 4 space indent, no tabs.
|
||||
# yay.
|
||||
#
|
||||
# Revision 1.4 2001/11/26 17:57:51 stroeder
|
||||
# Added __version__
|
||||
#
|
||||
# Revision 1.3 2001/08/09 09:08:55 anthonybaxter
|
||||
# added identifying header to top of each file
|
||||
#
|
||||
# Revision 1.2 2001/07/19 06:57:07 anthony
|
||||
# cvs keywords added
|
||||
#
|
||||
#
|
||||
Base.DiscoverNameServers()
|
||||
|
|
|
|||
|
|
@ -86,20 +86,6 @@ def getLastWordBoundary (s, width):
|
|||
return width-1
|
||||
|
||||
|
||||
def applyTable (table, s):
|
||||
"apply a table of replacement pairs to str"
|
||||
for mapping in table:
|
||||
s = s.replace(mapping[0], mapping[1])
|
||||
return s
|
||||
|
||||
|
||||
def sqlify (s):
|
||||
"Escape special SQL chars and strings"
|
||||
if not s:
|
||||
return "NULL"
|
||||
return "'%s'"%applyTable(SQLTable, s)
|
||||
|
||||
|
||||
def htmlify (s):
|
||||
"Escape special HTML chars and strings"
|
||||
return applyTable(HtmlTable, s)
|
||||
|
|
|
|||
|
|
@ -1,49 +0,0 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
"""XML utility functions"""
|
||||
# Copyright (C) 2003-2004 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
__version__ = "$Revision$"[11:-2]
|
||||
__date__ = "$Date$"[7:-2]
|
||||
|
||||
import xml.sax.saxutils
|
||||
|
||||
xmlattr_entities = {
|
||||
"&": "&",
|
||||
"<": "<",
|
||||
">": ">",
|
||||
"\"": """,
|
||||
}
|
||||
|
||||
|
||||
def xmlquote (s):
|
||||
"""quote characters for XML"""
|
||||
return xml.sax.saxutils.escape(s)
|
||||
|
||||
|
||||
def xmlquoteattr (s):
|
||||
"""quote XML attribute, ready for inclusion with double quotes"""
|
||||
return xml.sax.saxutils.escape(s, xmlattr_entities)
|
||||
|
||||
|
||||
def xmlunquote (s):
|
||||
"""unquote characters from XML"""
|
||||
return xml.sax.saxutils.unescape(s)
|
||||
|
||||
|
||||
def xmlunquoteattr (s):
|
||||
"""unquote attributes from XML"""
|
||||
return xml.sax.saxutils.unescape(s, xmlattr_entities)
|
||||
|
|
@ -16,9 +16,10 @@
|
|||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import re
|
||||
import sys
|
||||
import urlparse
|
||||
import re
|
||||
import time
|
||||
import linkcheck.i18n
|
||||
|
||||
|
||||
# logger areas
|
||||
|
|
@ -32,9 +33,36 @@ class LinkCheckerError (Exception):
|
|||
pass
|
||||
|
||||
|
||||
def strtime (t):
|
||||
"""return ISO 8601 formatted time"""
|
||||
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t)) + \
|
||||
strtimezone()
|
||||
|
||||
|
||||
def strduration (duration):
|
||||
"""return string formatted time duration"""
|
||||
name = linkcheck.i18n._("seconds")
|
||||
if duration > 60:
|
||||
duration = duration / 60
|
||||
name = linkcheck.i18n._("minutes")
|
||||
if duration > 60:
|
||||
duration = duration / 60
|
||||
name = linkcheck.i18n._("hours")
|
||||
return " %.3f %s"%(duration, name)
|
||||
|
||||
|
||||
def strtimezone ():
|
||||
"""return timezone info, %z on some platforms, but not supported on all"""
|
||||
if time.daylight:
|
||||
zone = time.altzone
|
||||
else:
|
||||
zone = time.timezone
|
||||
return "%+04d" % int(-zone/3600)
|
||||
|
||||
|
||||
def getLinkPat (arg, strict=False):
|
||||
"""get a link pattern matcher for intern/extern links"""
|
||||
linkcheck.log.debug(LOG_CHECK, "Link pattern %r", arg)
|
||||
bk.log.debug(LOG_CHECK, "Link pattern %r", arg)
|
||||
if arg[0:1] == '!':
|
||||
pattern = arg[1:]
|
||||
negate = True
|
||||
|
|
@ -48,48 +76,37 @@ def getLinkPat (arg, strict=False):
|
|||
}
|
||||
|
||||
|
||||
# file extensions we can parse recursively
|
||||
extensions = {
|
||||
"html": re.compile(r'(?i)\.s?html?$'),
|
||||
"opera": re.compile(r'^(?i)opera.adr$'), # opera bookmark file
|
||||
"css": re.compile(r'(?i)\.css$'), # CSS stylesheet
|
||||
# "text": re.compile(r'(?i)\.(txt|xml|tsv|csv|sgml?|py|java|cc?|cpp|h)$'),
|
||||
}
|
||||
|
||||
|
||||
import linkcheck.FileUrlData
|
||||
import linkcheck.IgnoredUrlData
|
||||
import linkcheck.FtpUrlData
|
||||
import linkcheck.GopherUrlData
|
||||
import linkcheck.HttpUrlData
|
||||
import linkcheck.HttpsUrlData
|
||||
import linkcheck.MailtoUrlData
|
||||
import linkcheck.TelnetUrlData
|
||||
import linkcheck.NntpUrlData
|
||||
|
||||
def set_intern_url (url, klass, config):
|
||||
"""Precondition: config['strict'] is true (ie strict checking) and
|
||||
recursion level is zero (ie url given on the command line)"""
|
||||
if klass == linkcheck.FileUrlData.FileUrlData:
|
||||
linkcheck.log.debug(LOG_CHECK, "Add intern pattern ^file:")
|
||||
config['internlinks'].append(getLinkPat("^file:"))
|
||||
elif klass in [linkcheck.HttpUrlData.HttpUrlData,
|
||||
linkcheck.HttpsUrlData.HttpsUrlData,
|
||||
linkcheck.FtpUrlData.FtpUrlData]:
|
||||
domain = urlparse.urlsplit(url)[1]
|
||||
if domain:
|
||||
domain = "://%s"%re.escape(domain)
|
||||
debug(BRING_IT_ON, "Add intern domain", domain)
|
||||
# add scheme colon to link pattern
|
||||
config['internlinks'].append(getLinkPat(domain))
|
||||
|
||||
|
||||
import linkcheck.logger
|
||||
|
||||
def printStatus (config, curtime, start_time):
|
||||
tocheck = len(config.urls)
|
||||
links = config['linknumber']
|
||||
active = config.threader.active_threads()
|
||||
duration = linkcheck.logger.strduration(curtime - start_time)
|
||||
duration = strduration(curtime - start_time)
|
||||
print >>sys.stderr, linkcheck.i18n._("%5d urls queued, %4d links checked, %2d active threads, runtime %s")%\
|
||||
(tocheck, links, active, duration)
|
||||
|
||||
|
||||
import linkcheck.logger.StandardLogger
|
||||
import linkcheck.logger.HtmlLogger
|
||||
import linkcheck.logger.ColoredLogger
|
||||
import linkcheck.logger.GMLLogger
|
||||
import linkcheck.logger.SQLLogger
|
||||
import linkcheck.logger.CSVLogger
|
||||
import linkcheck.logger.BlacklistLogger
|
||||
import linkcheck.logger.XMLLogger
|
||||
import linkcheck.logger.NoneLogger
|
||||
|
||||
|
||||
# default logger classes
|
||||
Loggers = {
|
||||
"text": linkcheck.logger.StandardLogger.StandardLogger,
|
||||
"html": linkcheck.logger.HtmlLogger.HtmlLogger,
|
||||
"colored": linkcheck.logger.ColoredLogger.ColoredLogger,
|
||||
"gml": linkcheck.logger.GMLLogger.GMLLogger,
|
||||
"sql": linkcheck.logger.SQLLogger.SQLLogger,
|
||||
"csv": linkcheck.logger.CSVLogger.CSVLogger,
|
||||
"blacklist": linkcheck.logger.BlacklistLogger.BlacklistLogger,
|
||||
"xml": linkcheck.logger.XMLLogger.XMLLogger,
|
||||
"none": linkcheck.logger.NoneLogger.NoneLogger,
|
||||
}
|
||||
# for easy printing: a comma separated logger list
|
||||
LoggerKeys = ", ".join(Loggers.keys())
|
||||
|
|
|
|||
|
|
@ -19,10 +19,10 @@
|
|||
import re
|
||||
import os
|
||||
import urlparse
|
||||
import linkcheck.UrlData
|
||||
import linkcheck.checker
|
||||
|
||||
# OSError is thrown on Windows when a file is not found
|
||||
linkcheck.UrlData.ExcList.append(OSError)
|
||||
linkcheck.checker.ExcList.append(OSError)
|
||||
|
||||
# if file extension was fruitless, look at the content
|
||||
contents = {
|
||||
|
|
|
|||
|
|
@ -78,18 +78,6 @@ def get_absolute_url (urlName, baseRef, parentName):
|
|||
return ""
|
||||
|
||||
|
||||
# we catch these exceptions, all other exceptions are internal
|
||||
# or system errors
|
||||
ExcList = [
|
||||
IOError,
|
||||
ValueError, # from httplib.py
|
||||
linkcheck.LinkCheckerError,
|
||||
linkcheck.DNS.Error,
|
||||
socket.timeout,
|
||||
socket.error,
|
||||
select.error,
|
||||
]
|
||||
|
||||
if hasattr(socket, "sslerror"):
|
||||
ExcList.append(socket.sslerror)
|
||||
|
||||
|
|
@ -226,15 +214,13 @@ class UrlData (object):
|
|||
def check (self):
|
||||
try:
|
||||
self._check()
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except (socket.error, select.error):
|
||||
# on Unix, ctrl-c can raise
|
||||
# error: (4, 'Interrupted system call')
|
||||
etype, value = sys.exc_info()[:2]
|
||||
if etype!=4:
|
||||
raise
|
||||
except linkcheck.test_support.Error:
|
||||
except (KeyboardInterrupt, linkcheck.test_support.Error):
|
||||
raise
|
||||
except:
|
||||
internal_error()
|
||||
|
|
|
|||
|
|
@ -17,6 +17,25 @@
|
|||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import time
|
||||
import socket
|
||||
import select
|
||||
import re
|
||||
import urlparse
|
||||
import linkcheck
|
||||
import linkcheck.DNS
|
||||
|
||||
|
||||
# we catch these exceptions, all other exceptions are internal
|
||||
# or system errors
|
||||
ExcList = [
|
||||
IOError,
|
||||
ValueError, # from httplib.py
|
||||
linkcheck.LinkCheckerError,
|
||||
linkcheck.DNS.Error,
|
||||
socket.timeout,
|
||||
socket.error,
|
||||
select.error,
|
||||
]
|
||||
|
||||
|
||||
# main check function
|
||||
|
|
@ -66,6 +85,32 @@ import linkcheck.checker.MailtoUrlData
|
|||
import linkcheck.checker.TelnetUrlData
|
||||
import linkcheck.checker.NntpUrlData
|
||||
|
||||
# file extensions we can parse recursively
|
||||
extensions = {
|
||||
"html": re.compile(r'(?i)\.s?html?$'),
|
||||
"opera": re.compile(r'^(?i)opera.adr$'), # opera bookmark file
|
||||
"css": re.compile(r'(?i)\.css$'), # CSS stylesheet
|
||||
# "text": re.compile(r'(?i)\.(txt|xml|tsv|csv|sgml?|py|java|cc?|cpp|h)$'),
|
||||
}
|
||||
|
||||
|
||||
def set_intern_url (url, klass, config):
|
||||
"""Precondition: config['strict'] is true (ie strict checking) and
|
||||
recursion level is zero (ie url given on the command line)"""
|
||||
if klass == linkcheck.checker.FileUrlData.FileUrlData:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "Add intern pattern ^file:")
|
||||
config['internlinks'].append(getLinkPat("^file:"))
|
||||
elif klass in [linkcheck.checker.HttpUrlData.HttpUrlData,
|
||||
linkcheck.checker.HttpsUrlData.HttpsUrlData,
|
||||
linkcheck.checker.FtpUrlData.FtpUrlData]:
|
||||
domain = urlparse.urlsplit(url)[1]
|
||||
if domain:
|
||||
domain = "://%s"%re.escape(domain)
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "Add intern domain", domain)
|
||||
# add scheme colon to link pattern
|
||||
config['internlinks'].append(getLinkPat(domain))
|
||||
|
||||
|
||||
def getUrlDataFrom (urlName, recursionLevel, config, parentName=None,
|
||||
baseRef=None, line=0, column=0, name=None,
|
||||
cmdline=None):
|
||||
|
|
|
|||
|
|
@ -1,200 +0,0 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
"""special container classes"""
|
||||
# Copyright (C) 2004 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
__version__ = "$Revision$"[11:-2]
|
||||
__date__ = "$Date$"[7:-2]
|
||||
|
||||
|
||||
class SetList (list):
|
||||
"""a list that eliminates all duplicates
|
||||
"""
|
||||
|
||||
def append (self, x):
|
||||
"""append only if not already there"""
|
||||
if x not in self:
|
||||
super(SetList, self).append(x)
|
||||
|
||||
def extend (self, x):
|
||||
"""extend while eliminating duplicates by appending item for item"""
|
||||
for i in x:
|
||||
self.append(i)
|
||||
|
||||
def insert (self, i, x):
|
||||
"""insert only if not already there"""
|
||||
if x not in self:
|
||||
super(SetList, self).insert(i, x)
|
||||
|
||||
def __setitem__ (self, key, value):
|
||||
"""set new value, and eliminate old duplicates (if any)"""
|
||||
oldvalues = []
|
||||
for i in range(len(self)):
|
||||
if self[i]==value:
|
||||
oldvalues.append(i)
|
||||
super(SetList, self).__setitem__(key, value)
|
||||
# remove old duplicates (from last to first)
|
||||
oldvalues.reverse()
|
||||
for i in oldvalues:
|
||||
if i!=key:
|
||||
del self[key]
|
||||
|
||||
|
||||
class ListDict (dict):
|
||||
"""a dictionary whose iterators reflect the order in which elements
|
||||
were added
|
||||
"""
|
||||
|
||||
def __init__ (self):
|
||||
"""initialize sorted key list"""
|
||||
# sorted list of keys
|
||||
self._keys = []
|
||||
|
||||
def __setitem__ (self, key, value):
|
||||
"""add key,value to dict, append key to sorted list"""
|
||||
if not self.has_key(key):
|
||||
self._keys.append(key)
|
||||
super(ListDict, self).__setitem__(key, value)
|
||||
|
||||
def __delitem__ (self, key):
|
||||
"""remove key from dict"""
|
||||
self._keys.remove(key)
|
||||
super(ListDict, self).__delitem__(key)
|
||||
|
||||
def values (self):
|
||||
"""return sorted list of values"""
|
||||
return [self[k] for k in self._keys]
|
||||
|
||||
def items (self):
|
||||
"""return sorted list of items"""
|
||||
return [(k, self[k]) for k in self._keys]
|
||||
|
||||
def keys (self):
|
||||
"""return sorted list of keys"""
|
||||
return self._keys[:]
|
||||
|
||||
def itervalues (self):
|
||||
"""return iterator over sorted values"""
|
||||
return iter(self.values())
|
||||
|
||||
def iteritems (self):
|
||||
"""return iterator over sorted items"""
|
||||
return iter(self.items())
|
||||
|
||||
def iterkeys (self):
|
||||
"""return iterator over sorted keys"""
|
||||
return iter(self.keys())
|
||||
|
||||
def clear (self):
|
||||
"""remove all dict entires"""
|
||||
self._keys = []
|
||||
super(ListDict, self).clear()
|
||||
|
||||
|
||||
class LRU (object):
|
||||
"""
|
||||
Implementation of a length-limited O(1) LRU queue.
|
||||
Built for and used by PyPE:
|
||||
http://pype.sourceforge.net
|
||||
Copyright 2003 Josiah Carlson. (Licensed under the GPL)
|
||||
"""
|
||||
class Node (object):
|
||||
def __init__ (self, prev, me):
|
||||
self.prev = prev
|
||||
self.me = me
|
||||
self.next = None
|
||||
|
||||
def __init__ (self, count, pairs=[]):
|
||||
self.count = max(count, 1)
|
||||
self.d = {}
|
||||
self.first = None
|
||||
self.last = None
|
||||
for key, value in pairs:
|
||||
self[key] = value
|
||||
|
||||
def __contains__ (self, obj):
|
||||
return obj in self.d
|
||||
|
||||
def has_key (self, obj):
|
||||
return self.d.has_key(obj)
|
||||
|
||||
def __getitem__ (self, obj):
|
||||
a = self.d[obj].me
|
||||
self[a[0]] = a[1]
|
||||
return a[1]
|
||||
|
||||
def __setitem__ (self, obj, val):
|
||||
if obj in self.d:
|
||||
del self[obj]
|
||||
nobj = self.Node(self.last, (obj, val))
|
||||
if self.first is None:
|
||||
self.first = nobj
|
||||
if self.last:
|
||||
self.last.next = nobj
|
||||
self.last = nobj
|
||||
self.d[obj] = nobj
|
||||
if len(self.d) > self.count:
|
||||
if self.first == self.last:
|
||||
self.first = None
|
||||
self.last = None
|
||||
return
|
||||
a = self.first
|
||||
a.next.prev = None
|
||||
self.first = a.next
|
||||
a.next = None
|
||||
del self.d[a.me[0]]
|
||||
del a
|
||||
|
||||
def __delitem__ (self, obj):
|
||||
nobj = self.d[obj]
|
||||
if nobj.prev:
|
||||
nobj.prev.next = nobj.next
|
||||
else:
|
||||
self.first = nobj.next
|
||||
if nobj.next:
|
||||
nobj.next.prev = nobj.prev
|
||||
else:
|
||||
self.last = nobj.prev
|
||||
del self.d[obj]
|
||||
|
||||
def __iter__ (self):
|
||||
cur = self.first
|
||||
while cur != None:
|
||||
cur2 = cur.next
|
||||
yield cur.me[1]
|
||||
cur = cur2
|
||||
|
||||
def iteritems (self):
|
||||
cur = self.first
|
||||
while cur != None:
|
||||
cur2 = cur.next
|
||||
yield cur.me
|
||||
cur = cur2
|
||||
|
||||
def iterkeys (self):
|
||||
return iter(self.d)
|
||||
|
||||
def itervalues (self):
|
||||
for i,j in self.iteritems():
|
||||
yield j
|
||||
|
||||
def keys (self):
|
||||
return self.d.keys()
|
||||
|
||||
def setdefault (self, key, failobj=None):
|
||||
if not self.has_key(key):
|
||||
self[key] = failobj
|
||||
return self[key]
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
"""internationalization support"""
|
||||
# Copyright (C) 2000-2004 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
# i18n suppport
|
||||
import os
|
||||
from _linkchecker_configdata import install_data
|
||||
|
||||
def init_gettext ():
|
||||
global _
|
||||
try:
|
||||
import gettext
|
||||
domain = 'linkcheck'
|
||||
localedir = os.path.join(install_data, 'share', 'locale')
|
||||
_ = gettext.translation(domain, localedir).gettext
|
||||
except (IOError, ImportError):
|
||||
# default gettext function
|
||||
_ = lambda s: s
|
||||
|
||||
init_gettext()
|
||||
|
|
@ -17,10 +17,10 @@
|
|||
|
||||
import sys
|
||||
import os
|
||||
import linkcheck
|
||||
import linkcheck.logger.Logger
|
||||
|
||||
|
||||
class BlacklistLogger (linkcheck.logger.Logger):
|
||||
class BlacklistLogger (linkcheck.logger.Logger.Logger):
|
||||
"""Updates a blacklist of wrong links. If a link on the blacklist
|
||||
is working (again), it is removed from the list. So after n days
|
||||
we have only links on the list which failed for n days.
|
||||
|
|
|
|||
|
|
@ -17,7 +17,9 @@
|
|||
|
||||
import time
|
||||
import csv
|
||||
import linkcheck
|
||||
import linkcheck.i18n
|
||||
import linkcheck.logger.StandardLogger
|
||||
import linkcheck.logger.Logger
|
||||
|
||||
|
||||
class CSVLogger (linkcheck.logger.StandardLogger.StandardLogger):
|
||||
|
|
@ -30,7 +32,7 @@ class CSVLogger (linkcheck.logger.StandardLogger.StandardLogger):
|
|||
self.lineterminator = "\n"
|
||||
|
||||
def init (self):
|
||||
linkcheck.logger.Logger.init(self)
|
||||
linkcheck.logger.Logger.Logger.init(self)
|
||||
if self.fd is None:
|
||||
return
|
||||
self.starttime = time.time()
|
||||
|
|
|
|||
|
|
@ -15,10 +15,12 @@
|
|||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import linkcheck
|
||||
import linkcheck.i18n
|
||||
import linkcheck.AnsiColor
|
||||
import linkcheck.logger.StandardLogger
|
||||
|
||||
|
||||
class ColoredLogger (linkcheck.logger.StandardLogger):
|
||||
class ColoredLogger (linkcheck.logger.StandardLogger.StandardLogger):
|
||||
"""ANSI colorized output"""
|
||||
|
||||
def __init__ (self, **args):
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import linkcheck
|
||||
import linkcheck.logger.Logger
|
||||
|
||||
|
||||
class NoneLogger (linkcheck.logger.Logger.Logger):
|
||||
|
|
|
|||
|
|
@ -17,6 +17,23 @@
|
|||
|
||||
import time
|
||||
import linkcheck
|
||||
import linkcheck.i18n
|
||||
import linkcheck.logger.StandardLogger
|
||||
import linkcheck.logger.Logger
|
||||
|
||||
|
||||
def applyTable (table, s):
|
||||
"apply a table of replacement pairs to str"
|
||||
for mapping in table:
|
||||
s = s.replace(mapping[0], mapping[1])
|
||||
return s
|
||||
|
||||
|
||||
def sqlify (s):
|
||||
"Escape special SQL chars and strings"
|
||||
if not s:
|
||||
return "NULL"
|
||||
return "'%s'"%applyTable(SQLTable, s)
|
||||
|
||||
|
||||
class SQLLogger (linkcheck.logger.StandardLogger.StandardLogger):
|
||||
|
|
@ -33,7 +50,7 @@ class SQLLogger (linkcheck.logger.StandardLogger.StandardLogger):
|
|||
self.starttime = time.time()
|
||||
if self.has_field("intro"):
|
||||
self.fd.write("-- "+(linkcheck.i18n._("created by %s at %s\n") % (linkcheck.Config.AppName,
|
||||
linkcheck.logger.strtime(self.starttime))))
|
||||
linkcheck.strtime(self.starttime))))
|
||||
self.fd.write("-- "+(linkcheck.i18n._("Get the newest version at %s\n") % linkcheck.Config.Url))
|
||||
self.fd.write("-- "+(linkcheck.i18n._("Write comments and bugs to %s\n\n") % \
|
||||
linkcheck.Config.Email))
|
||||
|
|
@ -47,19 +64,19 @@ class SQLLogger (linkcheck.logger.StandardLogger.StandardLogger):
|
|||
" values "
|
||||
"(%s,%d,%s,%s,%s,%s,%s,%s,%d,%s,%d,%d,%s,%d,%d,%d,%d)%s\n" % \
|
||||
(self.dbname,
|
||||
linkcheck.StringUtil.sqlify(urlData.urlName),
|
||||
sqlify(urlData.urlName),
|
||||
urlData.recursionLevel,
|
||||
linkcheck.StringUtil.sqlify(linkcheck.url.url_quote(urlData.parentName or "")),
|
||||
linkcheck.StringUtil.sqlify(urlData.baseRef),
|
||||
linkcheck.StringUtil.sqlify(urlData.errorString),
|
||||
linkcheck.StringUtil.sqlify(urlData.validString),
|
||||
linkcheck.StringUtil.sqlify(urlData.warningString),
|
||||
linkcheck.StringUtil.sqlify(urlData.infoString),
|
||||
sqlify(linkcheck.url.url_quote(urlData.parentName or "")),
|
||||
sqlify(urlData.baseRef),
|
||||
sqlify(urlData.errorString),
|
||||
sqlify(urlData.validString),
|
||||
sqlify(urlData.warningString),
|
||||
sqlify(urlData.infoString),
|
||||
urlData.valid,
|
||||
linkcheck.StringUtil.sqlify(linkcheck.url.url_quote(urlData.url)),
|
||||
sqlify(linkcheck.url.url_quote(urlData.url)),
|
||||
urlData.line,
|
||||
urlData.column,
|
||||
linkcheck.StringUtil.sqlify(urlData.name),
|
||||
sqlify(urlData.name),
|
||||
urlData.checktime,
|
||||
urlData.dltime,
|
||||
urlData.dlsize,
|
||||
|
|
|
|||
|
|
@ -16,9 +16,38 @@
|
|||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import time
|
||||
import xml.sax.saxutils
|
||||
import linkcheck
|
||||
|
||||
|
||||
xmlattr_entities = {
|
||||
"&": "&",
|
||||
"<": "<",
|
||||
">": ">",
|
||||
"\"": """,
|
||||
}
|
||||
|
||||
|
||||
def xmlquote (s):
|
||||
"""quote characters for XML"""
|
||||
return xml.sax.saxutils.escape(s)
|
||||
|
||||
|
||||
def xmlquoteattr (s):
|
||||
"""quote XML attribute, ready for inclusion with double quotes"""
|
||||
return xml.sax.saxutils.escape(s, xmlattr_entities)
|
||||
|
||||
|
||||
def xmlunquote (s):
|
||||
"""unquote characters from XML"""
|
||||
return xml.sax.saxutils.unescape(s)
|
||||
|
||||
|
||||
def xmlunquoteattr (s):
|
||||
"""unquote attributes from XML"""
|
||||
return xml.sax.saxutils.unescape(s, xmlattr_entities)
|
||||
|
||||
|
||||
class XMLLogger (linkcheck.logger.StandardLogger.StandardLogger):
|
||||
"""XML output mirroring the GML structure. Easy to parse with any XML
|
||||
tool."""
|
||||
|
|
@ -56,7 +85,7 @@ class XMLLogger (linkcheck.logger.StandardLogger.StandardLogger):
|
|||
self.fd.write(">\n")
|
||||
if self.has_field("realurl"):
|
||||
self.fd.write(" <label>%s</label>\n" %\
|
||||
linkcheck.XmlUtils.xmlquote(linkcheck.url.url_quote(node.url)))
|
||||
xmlquote(linkcheck.url.url_quote(node.url)))
|
||||
self.fd.write(" <data>\n")
|
||||
if node.dltime>=0 and self.has_field("dltime"):
|
||||
self.fd.write(" <dltime>%f</dltime>\n" % node.dltime)
|
||||
|
|
@ -85,7 +114,7 @@ class XMLLogger (linkcheck.logger.StandardLogger.StandardLogger):
|
|||
self.fd.write(">\n")
|
||||
if self.has_field("url"):
|
||||
self.fd.write(" <label>%s</label>\n" % \
|
||||
linkcheck.XmlUtils.linkcheck.xmlquote(node.urlName))
|
||||
xmlquote(node.urlName))
|
||||
self.fd.write(" <data>\n")
|
||||
if self.has_field("result"):
|
||||
self.fd.write(" <valid>%d</valid>\n" % \
|
||||
|
|
|
|||
|
|
@ -16,60 +16,3 @@
|
|||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import time
|
||||
import linkcheck
|
||||
import linkcheck.i18n
|
||||
|
||||
|
||||
def strtime (t):
|
||||
"""return ISO 8601 formatted time"""
|
||||
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t)) + \
|
||||
strtimezone()
|
||||
|
||||
|
||||
def strduration (duration):
|
||||
"""return string formatted time duration"""
|
||||
name = linkcheck.i18n._("seconds")
|
||||
if duration > 60:
|
||||
duration = duration / 60
|
||||
name = linkcheck.i18n._("minutes")
|
||||
if duration > 60:
|
||||
duration = duration / 60
|
||||
name = linkcheck.i18n._("hours")
|
||||
return " %.3f %s"%(duration, name)
|
||||
|
||||
|
||||
def strtimezone ():
|
||||
"""return timezone info, %z on some platforms, but not supported on all"""
|
||||
if time.daylight:
|
||||
zone = time.altzone
|
||||
else:
|
||||
zone = time.timezone
|
||||
return "%+04d" % int(-zone/3600)
|
||||
|
||||
|
||||
import linkcheck.logger.StandardLogger
|
||||
import linkcheck.logger.HtmlLogger
|
||||
import linkcheck.logger.ColoredLogger
|
||||
import linkcheck.logger.GMLLogger
|
||||
import linkcheck.logger.SQLLogger
|
||||
import linkcheck.logger.CSVLogger
|
||||
import linkcheck.logger.BlacklistLogger
|
||||
import linkcheck.logger.XMLLogger
|
||||
import linkcheck.logger.NoneLogger
|
||||
|
||||
|
||||
# default logger classes
|
||||
Loggers = {
|
||||
"text": linkcheck.logger.StandardLogger.StandardLogger,
|
||||
"html": linkcheck.logger.HtmlLogger.HtmlLogger,
|
||||
"colored": linkcheck.logger.ColoredLogger.ColoredLogger,
|
||||
"gml": linkcheck.logger.GMLLogger.GMLLogger,
|
||||
"sql": linkcheck.logger.SQLLogger.SQLLogger,
|
||||
"csv": linkcheck.logger.CSVLogger.CSVLogger,
|
||||
"blacklist": linkcheck.logger.BlacklistLogger.BlacklistLogger,
|
||||
"xml": linkcheck.logger.XMLLogger.XMLLogger,
|
||||
"none": linkcheck.logger.NoneLogger.NoneLogger,
|
||||
}
|
||||
# for easy printing: a comma separated logger list
|
||||
LoggerKeys = ", ".join(Loggers.keys())
|
||||
|
|
|
|||
Loading…
Reference in a new issue