git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1355 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2004-07-07 18:01:25 +00:00
parent 5a644b35b3
commit 6f37e1961d
11 changed files with 1134 additions and 0 deletions

View file

@ -0,0 +1,77 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2000-2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import sys
import os
import linkcheck
class BlacklistLogger (linkcheck.logger.Logger):
"""Updates a blacklist of wrong links. If a link on the blacklist
is working (again), it is removed from the list. So after n days
we have only links on the list which failed for n days.
"""
def __init__ (self, **args):
super(BlacklistLogger, self).__init__(**args)
self.errors = 0
self.blacklist = {}
if args.has_key('fileoutput'):
self.fileoutput = True
filename = args['filename']
if os.path.exists(filename):
self.readBlacklist(file(filename, "r"))
self.fd = file(filename, "w")
elif args.has_key('fd'):
self.fd = args['fd']
else:
self.fileoutput = False
self.fd = sys.stdout
def newUrl (self, urlData):
if not urlData.cached:
key = urlData.getCacheKey()
if key in self.blacklist:
if urlData.valid:
del self.blacklist[key]
else:
self.blacklist[key] += 1
else:
if not urlData.valid:
self.blacklist[key] = 1
def endOfOutput (self, linknumber=-1):
self.writeBlacklist()
def readBlacklist (self, fd):
for line in fd:
line = line.rstrip()
if line.startswith('#') or not line:
continue
value, key = line.split(None, 1)
self.blacklist[key] = int(value)
fd.close()
def writeBlacklist (self):
"""write the blacklist"""
oldmask = os.umask(0077)
for key, value in self.blacklist.items():
self.fd.write("%d %s\n" % (value, key))
if self.fileoutput:
self.fd.close()
# restore umask
os.umask(oldmask)

View file

@ -0,0 +1,92 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2000-2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import time
import csv
import linkcheck
class CSVLogger (linkcheck.logger.StandardLogger.StandardLogger):
""" CSV output. CSV consists of one line per entry. Entries are
separated by a semicolon.
"""
def __init__ (self, **args):
super(CSVLogger, self).__init__(**args)
self.separator = args['separator']
self.lineterminator = "\n"
def init (self):
linkcheck.logger.Logger.init(self)
if self.fd is None:
return
self.starttime = time.time()
if self.has_field("intro"):
self.fd.write("# "+(linkcheck.i18n._("created by %s at %s%s") % (linkcheck.Config.AppName, linkcheck.logger.strtime(self.starttime), self.lineterminator)))
self.fd.write("# "+(linkcheck.i18n._("Get the newest version at %s%s") % (linkcheck.Config.Url, self.lineterminator)))
self.fd.write("# "+(linkcheck.i18n._("Write comments and bugs to %s%s%s") % \
(linkcheck.Config.Email, self.lineterminator, self.lineterminator)))
self.fd.write(
linkcheck.i18n._("# Format of the entries:")+self.lineterminator+\
"# urlname;"+self.lineterminator+\
"# recursionlevel;"+self.lineterminator+\
"# parentname;"+self.lineterminator+\
"# baseref;"+self.lineterminator+\
"# errorstring;"+self.lineterminator+\
"# validstring;"+self.lineterminator+\
"# warningstring;"+self.lineterminator+\
"# infostring;"+self.lineterminator+\
"# valid;"+self.lineterminator+\
"# url;"+self.lineterminator+\
"# line;"+self.lineterminator+\
"# column;"+self.lineterminator+\
"# name;"+self.lineterminator+\
"# dltime;"+self.lineterminator+\
"# dlsize;"+self.lineterminator+\
"# checktime;"+self.lineterminator+\
"# cached;"+self.lineterminator)
self.flush()
self.writer = csv.writer(self.fd, dialect='excel', delimiter=self.separator, lineterminator=self.lineterminator)
def newUrl (self, urlData):
if self.fd is None:
return
row = [urlData.urlName, urlData.recursionLevel,
linkcheck.url.url_quote(urlData.parentName or ""), urlData.baseRef,
urlData.errorString, urlData.validString,
urlData.warningString, urlData.infoString,
urlData.valid, linkcheck.url.url_quote(urlData.url),
urlData.line, urlData.column,
urlData.name, urlData.dltime,
urlData.dlsize, urlData.checktime,
urlData.cached]
self.writer.writerow(row)
self.flush()
def endOfOutput (self, linknumber=-1):
if self.fd is None:
return
self.stoptime = time.time()
if self.has_field("outro"):
duration = self.stoptime - self.starttime
self.fd.write("# "+linkcheck.i18n._("Stopped checking at %s (%s)%s")%\
(linkcheck.logger.strtime(self.stoptime),
linkcheck.logger.strduration(duration), self.lineterminator))
self.flush()
self.fd.close()
self.fd = None

View file

@ -0,0 +1,154 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2000-2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import linkcheck
class ColoredLogger (linkcheck.logger.StandardLogger):
"""ANSI colorized output"""
def __init__ (self, **args):
super(ColoredLogger, self).__init__(**args)
self.colorparent = linkcheck.AnsiColor.esc_ansicolor(args['colorparent'])
self.colorurl = linkcheck.AnsiColor.esc_ansicolor(args['colorurl'])
self.colorname = linkcheck.AnsiColor.esc_ansicolor(args['colorname'])
self.colorreal = linkcheck.AnsiColor.esc_ansicolor(args['colorreal'])
self.colorbase = linkcheck.AnsiColor.esc_ansicolor(args['colorbase'])
self.colorvalid = linkcheck.AnsiColor.esc_ansicolor(args['colorvalid'])
self.colorinvalid = linkcheck.AnsiColor.esc_ansicolor(args['colorinvalid'])
self.colorinfo = linkcheck.AnsiColor.esc_ansicolor(args['colorinfo'])
self.colorwarning = linkcheck.AnsiColor.esc_ansicolor(args['colorwarning'])
self.colordltime = linkcheck.AnsiColor.esc_ansicolor(args['colordltime'])
self.colordlsize = linkcheck.AnsiColor.esc_ansicolor(args['colordlsize'])
self.colorreset = linkcheck.AnsiColor.esc_ansicolor(args['colorreset'])
self.currentPage = None
self.prefix = 0
def newUrl (self, urlData):
if self.fd is None:
return
if self.has_field("parenturl"):
if urlData.parentName:
if self.currentPage != urlData.parentName:
if self.prefix:
self.fd.write("o\n")
self.fd.write("\n"+self.field("parenturl")+
self.spaces("parenturl")+
self.colorparent+
linkcheck.url.url_quote(urlData.parentName or "")+
self.colorreset+"\n")
self.currentPage = urlData.parentName
self.prefix = 1
else:
if self.prefix:
self.fd.write("o\n")
self.prefix = 0
self.currentPage=None
if self.has_field("url"):
if self.prefix:
self.fd.write("|\n+- ")
else:
self.fd.write("\n")
self.fd.write(self.field("url")+self.spaces("url")+self.colorurl+
urlData.urlName+self.colorreset)
if urlData.line:
self.fd.write(linkcheck.i18n._(", line %d")%urlData.line)
if urlData.column:
self.fd.write(linkcheck.i18n._(", col %d")%urlData.column)
if urlData.cached:
self.fd.write(linkcheck.i18n._(" (cached)\n"))
else:
self.fd.write("\n")
if urlData.name and self.has_field("name"):
if self.prefix:
self.fd.write("| ")
self.fd.write(self.field("name")+self.spaces("name")+
self.colorname+urlData.name+self.colorreset+"\n")
if urlData.baseRef and self.has_field("base"):
if self.prefix:
self.fd.write("| ")
self.fd.write(self.field("base")+self.spaces("base")+
self.colorbase+urlData.baseRef+self.colorreset+"\n")
if urlData.url and self.has_field("realurl"):
if self.prefix:
self.fd.write("| ")
self.fd.write(self.field("realurl")+self.spaces("realurl")+
self.colorreal+linkcheck.url.url_quote(urlData.url)+
self.colorreset+"\n")
if urlData.dltime>=0 and self.has_field("dltime"):
if self.prefix:
self.fd.write("| ")
self.fd.write(self.field("dltime")+self.spaces("dltime")+
self.colordltime+
(linkcheck.i18n._("%.3f seconds") % urlData.dltime)+
self.colorreset+"\n")
if urlData.dlsize>=0 and self.has_field("dlsize"):
if self.prefix:
self.fd.write("| ")
self.fd.write(self.field("dlsize")+self.spaces("dlsize")+
self.colordlsize+linkcheck.StringUtil.strsize(urlData.dlsize)+
self.colorreset+"\n")
if urlData.checktime and self.has_field("checktime"):
if self.prefix:
self.fd.write("| ")
self.fd.write(self.field("checktime")+self.spaces("checktime")+
self.colordltime+
(linkcheck.i18n._("%.3f seconds") % urlData.checktime)+self.colorreset+"\n")
if urlData.infoString and self.has_field("info"):
if self.prefix:
self.fd.write("| "+self.field("info")+self.spaces("info")+
linkcheck.StringUtil.indentWith(linkcheck.StringUtil.blocktext(
urlData.infoString, 65), "| "+self.spaces("info")))
else:
self.fd.write(self.field("info")+self.spaces("info")+
linkcheck.StringUtil.indentWith(linkcheck.StringUtil.blocktext(
urlData.infoString, 65), " "+self.spaces("info")))
self.fd.write(self.colorreset+"\n")
if urlData.warningString:
#self.warnings += 1
if self.has_field("warning"):
if self.prefix:
self.fd.write("| ")
self.fd.write(self.field("warning")+self.spaces("warning")+
self.colorwarning+
urlData.warningString+self.colorreset+"\n")
if self.has_field("result"):
if self.prefix:
self.fd.write("| ")
self.fd.write(self.field("result")+self.spaces("result"))
if urlData.valid:
self.fd.write(self.colorvalid+urlData.validString+
self.colorreset+"\n")
else:
self.errors += 1
self.fd.write(self.colorinvalid+urlData.errorString+
self.colorreset+"\n")
self.flush()
def endOfOutput (self, linknumber=-1):
if self.fd is None:
return
if self.has_field("outro"):
if self.prefix:
self.fd.write("o\n")
super(ColoredLogger, self).endOfOutput(linknumber=linknumber)

View file

@ -0,0 +1,98 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2000-2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import time
import linkcheck
class GMLLogger (linkcheck.logger.StandardLogger.StandardLogger):
"""GML means Graph Modeling Language. Use a GML tool to see
your sitemap graph.
"""
def __init__ (self, **args):
super(GMLLogger, self).__init__(**args)
self.nodes = {}
self.nodeid = 0
def init (self):
linkcheck.logger.Logger.Logger.init(self)
if self.fd is None:
return
self.starttime = time.time()
if self.has_field("intro"):
self.fd.write("# "+(linkcheck.i18n._("created by %s at %s\n") % (linkcheck.Config.AppName,
linkcheck.logger.strtime(self.starttime))))
self.fd.write("# "+(linkcheck.i18n._("Get the newest version at %s\n") % linkcheck.Config.Url))
self.fd.write("# "+(linkcheck.i18n._("Write comments and bugs to %s\n\n") % \
linkcheck.Config.Email))
self.fd.write("graph [\n directed 1\n")
self.flush()
def newUrl (self, urlData):
"""write one node and all possible edges"""
if self.fd is None:
return
node = urlData
if node.url and not self.nodes.has_key(node.url):
node.id = self.nodeid
self.nodes[node.url] = node
self.nodeid += 1
self.fd.write(" node [\n")
self.fd.write(" id %d\n" % node.id)
if self.has_field("realurl"):
self.fd.write(' label "%s"\n' % linkcheck.url.url_quote(node.url))
if node.dltime>=0 and self.has_field("dltime"):
self.fd.write(" dltime %d\n" % node.dltime)
if node.dlsize>=0 and self.has_field("dlsize"):
self.fd.write(" dlsize %d\n" % node.dlsize)
if node.checktime and self.has_field("checktime"):
self.fd.write(" checktime %d\n" % node.checktime)
if self.has_field("extern"):
self.fd.write(" extern %d\n" % (node.extern and 1 or 0))
self.fd.write(" ]\n")
self.writeEdges()
def writeEdges (self):
"""write all edges we can find in the graph in a brute-force
manner. Better would be a mapping of parent urls.
"""
for node in self.nodes.values():
if self.nodes.has_key(node.parentName):
self.fd.write(" edge [\n")
self.fd.write(' label "%s"\n' % node.urlName)
if self.has_field("parenturl"):
self.fd.write(" source %d\n" % \
self.nodes[node.parentName].id)
self.fd.write(" target %d\n" % node.id)
if self.has_field("result"):
self.fd.write(" valid %d\n" % (node.valid and 1 or 0))
self.fd.write(" ]\n")
self.flush()
def endOfOutput (self, linknumber=-1):
if self.fd is None:
return
self.fd.write("]\n")
if self.has_field("outro"):
self.stoptime = time.time()
duration = self.stoptime - self.starttime
self.fd.write("# "+linkcheck.i18n._("Stopped checking at %s (%s)\n")%\
(linkcheck.logger.strtime(self.stoptime),
linkcheck.logger.strduration(duration)))
self.flush()
self.fd = None

View file

@ -0,0 +1,172 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2000-2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import time
import linkcheck
HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD html 4.01//EN">
<html><head><title>%s</title>
<style type="text/css">\n<!--
h2 { font-family: Verdana,sans-serif; font-size: 22pt;
font-style: bold; font-weight: bold }
body { font-family: Arial,sans-serif; font-size: 11pt }
td { font-family: Arial,sans-serif; font-size: 11pt }
code { font-family: Courier }
a:hover { color: #34a4ef }
//-->
</style></head>
<body bgcolor="%s" link="%s" vlink="%s" alink="%s">
"""
class HtmlLogger (linkcheck.logger.StandardLogger.StandardLogger):
"""Logger with HTML output"""
def __init__ (self, **args):
super(HtmlLogger, self).__init__(**args)
self.colorbackground = args['colorbackground']
self.colorurl = args['colorurl']
self.colorborder = args['colorborder']
self.colorlink = args['colorlink']
self.tablewarning = args['tablewarning']
self.tableerror = args['tableerror']
self.tableok = args['tableok']
def init (self):
linkcheck.logger.Logger.Logger.init(self)
if self.fd is None:
return
self.starttime = time.time()
self.fd.write(HTML_HEADER%(linkcheck.Config.App, self.colorbackground,
self.colorlink, self.colorlink, self.colorlink))
if self.has_field('intro'):
self.fd.write("<center><h2>"+linkcheck.Config.App+"</h2></center>"+
"<br><blockquote>"+linkcheck.Config.Freeware+"<br><br>"+
(linkcheck.i18n._("Start checking at %s\n") % \
linkcheck.logger.strtime(self.starttime))+
"<br>")
self.flush()
def newUrl (self, urlData):
if self.fd is None:
return
self.fd.write("<br clear=\"all\"><br>\n"+
"<table align=\"left\" border=\"0\" cellspacing=\"0\" cellpadding=\"1\"\n"+
" bgcolor=\""+self.colorborder+"\" summary=\"Border\">\n"+
"<tr>\n"+
"<td>\n"+
"<table align=\"left\" border=\"0\" cellspacing=\"0\" cellpadding=\"3\"\n"+
" summary=\"checked link\" bgcolor=\""+self.colorbackground+"\">\n")
if self.has_field("url"):
self.fd.write("<tr>\n"+
"<td bgcolor=\""+self.colorurl+"\">"+self.field("url")+"</td>\n"+
"<td bgcolor=\""+self.colorurl+"\">"+urlData.urlName)
if urlData.cached:
self.fd.write(linkcheck.i18n._(" (cached)"))
self.fd.write("</td>\n</tr>\n")
if urlData.name and self.has_field("name"):
self.fd.write("<tr>\n<td>"+self.field("name")+"</td>\n<td>"+
urlData.name+"</td>\n</tr>\n")
if urlData.parentName and self.has_field("parenturl"):
self.fd.write("<tr>\n<td>"+self.field("parenturl")+
'</td>\n<td><a target="top" href="'+
linkcheck.url.url_quote(urlData.parentName or "")+'">'+
linkcheck.url.url_quote(urlData.parentName or "")+"</a>")
if urlData.line:
self.fd.write(linkcheck.i18n._(", line %d")%urlData.line)
if urlData.column:
self.fd.write(linkcheck.i18n._(", col %d")%urlData.column)
self.fd.write("</td>\n</tr>\n")
if urlData.baseRef and self.has_field("base"):
self.fd.write("<tr>\n<td>"+self.field("base")+"</td>\n<td>"+
urlData.baseRef+"</td>\n</tr>\n")
if urlData.url and self.has_field("realurl"):
self.fd.write("<tr>\n<td>"+self.field("realurl")+"</td>\n<td>"+
'<a target="top" href="'+linkcheck.url.url_quote(urlData.url)+
'">'+linkcheck.url.url_quote(urlData.url)+"</a></td>\n</tr>\n")
if urlData.dltime>=0 and self.has_field("dltime"):
self.fd.write("<tr>\n<td>"+self.field("dltime")+"</td>\n<td>"+
(linkcheck.i18n._("%.3f seconds") % urlData.dltime)+
"</td>\n</tr>\n")
if urlData.dlsize>=0 and self.has_field("dlsize"):
self.fd.write("<tr>\n<td>"+self.field("dlsize")+"</td>\n<td>"+
linkcheck.StringUtil.strsize(urlData.dlsize)+
"</td>\n</tr>\n")
if urlData.checktime and self.has_field("checktime"):
self.fd.write("<tr>\n<td>"+self.field("checktime")+
"</td>\n<td>"+
(linkcheck.i18n._("%.3f seconds") % urlData.checktime)+
"</td>\n</tr>\n")
if urlData.infoString and self.has_field("info"):
self.fd.write("<tr>\n<td>"+self.field("info")+"</td>\n<td>"+
linkcheck.StringUtil.htmlify(urlData.infoString)+
"</td>\n</tr>\n")
if urlData.warningString:
#self.warnings += 1
if self.has_field("warning"):
self.fd.write("<tr>\n"+
self.tablewarning+self.field("warning")+
"</td>\n"+self.tablewarning+
urlData.warningString.replace("\n", "<br>")+
"</td>\n</tr>\n")
if self.has_field("result"):
if urlData.valid:
self.fd.write("<tr>\n"+self.tableok+
self.field("result")+"</td>\n"+
self.tableok+urlData.validString+"</td>\n</tr>\n")
else:
self.errors += 1
self.fd.write("<tr>\n"+self.tableerror+self.field("result")+
"</td>\n"+self.tableerror+
urlData.errorString+"</td>\n</tr>\n")
self.fd.write("</table></td></tr></table><br clear=\"all\">")
self.flush()
def endOfOutput (self, linknumber=-1):
if self.fd is None:
return
if self.has_field("outro"):
self.fd.write("\n"+linkcheck.i18n._("Thats it. "))
#if self.warnings==1:
# self.fd.write(linkcheck.i18n._("1 warning, "))
#else:
# self.fd.write(str(self.warnings)+linkcheck.i18n._(" warnings, "))
if self.errors==1:
self.fd.write(linkcheck.i18n._("1 error"))
else:
self.fd.write(str(self.errors)+linkcheck.i18n._(" errors"))
if linknumber >= 0:
if linknumber == 1:
self.fd.write(linkcheck.i18n._(" in 1 link"))
else:
self.fd.write(linkcheck.i18n._(" in %d links") % linknumber)
self.fd.write(linkcheck.i18n._(" found")+"\n<br>")
self.stoptime = time.time()
duration = self.stoptime - self.starttime
self.fd.write(linkcheck.i18n._("Stopped checking at %s (%s)\n")%\
(linkcheck.logger.strtime(self.stoptime),
linkcheck.logger.strduration(duration)))
self.fd.write("</blockquote><br><hr noshade size=\"1\"><small>"+
linkcheck.Config.HtmlAppInfo+"<br>")
self.fd.write(linkcheck.i18n._("Get the newest version at %s\n") %\
('<a href="'+linkcheck.Config.Url+'" target="_top">'+linkcheck.Config.Url+
"</a>.<br>"))
self.fd.write(linkcheck.i18n._("Write comments and bugs to %s\n\n") %\
('<a href="mailto:'+linkcheck.Config.Email+'">'+linkcheck.Config.Email+"</a>."))
self.fd.write("</small></body></html>")
self.flush()
self.fd = None

View file

@ -0,0 +1,81 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2000-2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import linkcheck
class Logger (object):
Fields = {
"realurl": linkcheck.i18n._("Real URL"),
"result": linkcheck.i18n._("Result"),
"base": linkcheck.i18n._("Base"),
"name": linkcheck.i18n._("Name"),
"parenturl": linkcheck.i18n._("Parent URL"),
"extern": linkcheck.i18n._("Extern"),
"info": linkcheck.i18n._("Info"),
"warning": linkcheck.i18n._("Warning"),
"dltime": linkcheck.i18n._("D/L Time"),
"dlsize": linkcheck.i18n._("D/L Size"),
"checktime": linkcheck.i18n._("Check Time"),
"url": linkcheck.i18n._("URL"),
}
def __init__ (self, **args):
self.logfields = None # log all fields
if args.has_key('fields'):
if "all" not in args['fields']:
self.logfields = args['fields']
def has_field (self, name):
if self.logfields is None:
# log all fields
return True
return name in self.logfields
def field (self, name):
"""return translated field name"""
# XXX i18nreal._(self.Fields[name])
return self.Fields[name]
def spaces (self, name):
return self.logspaces[name]
def init (self):
# map with spaces between field name and value
self.logspaces = {}
if self.logfields is None:
fields = self.Fields.keys()
else:
fields = self.logfields
values = [self.field(x) for x in fields]
# maximum indent for localized log field names
self.max_indent = max(map(lambda x: len(x), values))+1
for key in fields:
self.logspaces[key] = " "*(self.max_indent - len(self.field(key)))
def newUrl (self, urlData):
raise Exception, "abstract function"
def endOfOutput (self, linknumber=-1):
raise Exception, "abstract function"
def __str__ (self):
return self.__class__.__name__
def __repr__ (self):
return repr(self.__class__.__name__)

View file

@ -0,0 +1,28 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2000-2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import linkcheck
class NoneLogger (linkcheck.logger.Logger.Logger):
"""Dummy logger printing nothing."""
def newUrl (self, urlData):
pass
def endOfOutput (self, linknumber=-1):
pass

View file

@ -0,0 +1,79 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2000-2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import time
import linkcheck
class SQLLogger (linkcheck.logger.StandardLogger.StandardLogger):
""" SQL output for PostgreSQL, not tested"""
def __init__ (self, **args):
super(SQLLogger, self).__init__(**args)
self.dbname = args['dbname']
self.separator = args['separator']
def init (self):
linkcheck.logger.Logger.Logger.init(self)
if self.fd is None: return
self.starttime = time.time()
if self.has_field("intro"):
self.fd.write("-- "+(linkcheck.i18n._("created by %s at %s\n") % (linkcheck.Config.AppName,
linkcheck.logger.strtime(self.starttime))))
self.fd.write("-- "+(linkcheck.i18n._("Get the newest version at %s\n") % linkcheck.Config.Url))
self.fd.write("-- "+(linkcheck.i18n._("Write comments and bugs to %s\n\n") % \
linkcheck.Config.Email))
self.flush()
def newUrl (self, urlData):
if self.fd is None: return
self.fd.write("insert into %s(urlname,recursionlevel,parentname,"
"baseref,errorstring,validstring,warningstring,infostring,"
"valid,url,line,col,name,checktime,dltime,dlsize,cached)"
" values "
"(%s,%d,%s,%s,%s,%s,%s,%s,%d,%s,%d,%d,%s,%d,%d,%d,%d)%s\n" % \
(self.dbname,
linkcheck.StringUtil.sqlify(urlData.urlName),
urlData.recursionLevel,
linkcheck.StringUtil.sqlify(linkcheck.url.url_quote(urlData.parentName or "")),
linkcheck.StringUtil.sqlify(urlData.baseRef),
linkcheck.StringUtil.sqlify(urlData.errorString),
linkcheck.StringUtil.sqlify(urlData.validString),
linkcheck.StringUtil.sqlify(urlData.warningString),
linkcheck.StringUtil.sqlify(urlData.infoString),
urlData.valid,
linkcheck.StringUtil.sqlify(linkcheck.url.url_quote(urlData.url)),
urlData.line,
urlData.column,
linkcheck.StringUtil.sqlify(urlData.name),
urlData.checktime,
urlData.dltime,
urlData.dlsize,
urlData.cached,
self.separator))
self.flush()
def endOfOutput (self, linknumber=-1):
if self.fd is None: return
if self.has_field("outro"):
self.stoptime = time.time()
duration = self.stoptime - self.starttime
self.fd.write("-- "+linkcheck.i18n._("Stopped checking at %s (%s)\n")%\
(linkcheck.logger.strtime(self.stoptime),
linkcheck.logger.strduration(duration)))
self.flush()
self.fd = None

View file

@ -0,0 +1,170 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2000-2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import sys
import time
import linkcheck
import linkcheck.logger.Logger
class StandardLogger (linkcheck.logger.Logger.Logger):
"""Standard text logger.
Every Logger has to implement the following functions:
init(self)
Called once to initialize the Logger. Why do we not use __init__(self)?
Because we initialize the start time in init and __init__ gets not
called at the time the checking starts but when the logger object is
created.
Another reason is that we dont want might create several loggers
as a default and then switch to another configured output. So we
must not print anything out at __init__ time.
newUrl(self,urlData)
Called every time an url finished checking. All data we checked is in
the UrlData object urlData.
endOfOutput(self)
Called at the end of checking to close filehandles and such.
Passing parameters to the constructor:
__init__(self, **args)
The args dictionary is filled in Config.py. There you can specify
default parameters. Adjust these parameters in the configuration
files in the appropriate logger section.
Informal text output format spec:
Output consists of a set of URL logs separated by one or more
blank lines.
A URL log consists of two or more lines. Each line consists of
keyword and data, separated by whitespace.
Unknown keywords will be ignored.
"""
def __init__ (self, **args):
super(StandardLogger, self).__init__(**args)
self.errors = 0
#self.warnings = 0
if args.has_key('fileoutput'):
self.fd = file(args['filename'], "w")
elif args.has_key('fd'):
self.fd = args['fd']
else:
self.fd = sys.stdout
def init (self):
super(StandardLogger, self).init()
if self.fd is None:
return
self.starttime = time.time()
if self.has_field('intro'):
self.fd.write("%s\n%s\n" % (linkcheck.Config.AppInfo, linkcheck.Config.Freeware))
self.fd.write(linkcheck.i18n._("Get the newest version at %s\n") % linkcheck.Config.Url)
self.fd.write(linkcheck.i18n._("Write comments and bugs to %s\n\n") % linkcheck.Config.Email)
self.fd.write(linkcheck.i18n._("Start checking at %s\n") % linkcheck.logger.strtime(self.starttime))
self.flush()
def newUrl (self, urlData):
if self.fd is None:
return
if self.has_field('url'):
self.fd.write("\n"+self.field('url')+self.spaces('url')+
urlData.urlName)
if urlData.cached:
self.fd.write(linkcheck.i18n._(" (cached)\n"))
else:
self.fd.write("\n")
if urlData.name and self.has_field('name'):
self.fd.write(self.field("name")+self.spaces("name")+
urlData.name+"\n")
if urlData.parentName and self.has_field('parenturl'):
self.fd.write(self.field('parenturl')+self.spaces("parenturl")+
linkcheck.url.url_quote(urlData.parentName or "")+
(linkcheck.i18n._(", line %d")%urlData.line)+
(linkcheck.i18n._(", col %d")%urlData.column)+"\n")
if urlData.baseRef and self.has_field('base'):
self.fd.write(self.field("base")+self.spaces("base")+
urlData.baseRef+"\n")
if urlData.url and self.has_field('realurl'):
self.fd.write(self.field("realurl")+self.spaces("realurl")+
linkcheck.url.url_quote(urlData.url)+"\n")
if urlData.dltime>=0 and self.has_field('dltime'):
self.fd.write(self.field("dltime")+self.spaces("dltime")+
linkcheck.i18n._("%.3f seconds\n") % urlData.dltime)
if urlData.dlsize>=0 and self.has_field('dlsize'):
self.fd.write(self.field("dlsize")+self.spaces("dlsize")+
"%s\n"%linkcheck.StringUtil.strsize(urlData.dlsize))
if urlData.checktime and self.has_field('checktime'):
self.fd.write(self.field("checktime")+self.spaces("checktime")+
linkcheck.i18n._("%.3f seconds\n") % urlData.checktime)
if urlData.infoString and self.has_field('info'):
self.fd.write(self.field("info")+self.spaces("info")+
linkcheck.StringUtil.indent(
linkcheck.StringUtil.blocktext(urlData.infoString, 65),
self.max_indent)+"\n")
if urlData.warningString:
#self.warnings += 1
if self.has_field('warning'):
self.fd.write(self.field("warning")+self.spaces("warning")+
linkcheck.StringUtil.indent(
linkcheck.StringUtil.blocktext(urlData.warningString, 65),
self.max_indent)+"\n")
if self.has_field('result'):
self.fd.write(self.field("result")+self.spaces("result"))
if urlData.valid:
self.fd.write(urlData.validString+"\n")
else:
self.errors += 1
self.fd.write(urlData.errorString+"\n")
self.flush()
def endOfOutput (self, linknumber=-1):
if self.fd is None:
return
if self.has_field('outro'):
self.fd.write(linkcheck.i18n._("\nThats it. "))
#if self.warnings==1:
# self.fd.write(linkcheck.i18n._("1 warning, "))
#else:
# self.fd.write(str(self.warnings)+linkcheck.i18n._(" warnings, "))
if self.errors==1:
self.fd.write(linkcheck.i18n._("1 error"))
else:
self.fd.write(str(self.errors)+linkcheck.i18n._(" errors"))
if linknumber >= 0:
if linknumber == 1:
self.fd.write(linkcheck.i18n._(" in 1 link"))
else:
self.fd.write(linkcheck.i18n._(" in %d links") % linknumber)
self.fd.write(linkcheck.i18n._(" found\n"))
self.stoptime = time.time()
duration = self.stoptime - self.starttime
self.fd.write(linkcheck.i18n._("Stopped checking at %s (%s)\n") % \
(linkcheck.logger.strtime(self.stoptime),
linkcheck.logger.strduration(duration)))
self.flush()
self.fd = None
def flush (self):
"""ignore flush errors since we are not responsible for proper
flushing of log output streams"""
if self.fd:
try:
self.fd.flush()
except IOError:
pass

View file

@ -0,0 +1,109 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2000-2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import time
import linkcheck
class XMLLogger (linkcheck.logger.StandardLogger.StandardLogger):
"""XML output mirroring the GML structure. Easy to parse with any XML
tool."""
def __init__ (self, **args):
super(XMLLogger, self).__init__(**args)
self.nodes = {}
self.nodeid = 0
def init (self):
linkcheck.logger.Logger.Logger.init(self)
if self.fd is None: return
self.starttime = time.time()
self.fd.write('<?xml version="1.0"?>\n')
if self.has_field("intro"):
self.fd.write("<!--\n")
self.fd.write(" "+linkcheck.i18n._("created by %s at %s\n") % \
(linkcheck.Config.AppName, linkcheck.logger.strtime(self.starttime)))
self.fd.write(" "+linkcheck.i18n._("Get the newest version at %s\n") % linkcheck.Config.Url)
self.fd.write(" "+linkcheck.i18n._("Write comments and bugs to %s\n\n") % \
linkcheck.Config.Email)
self.fd.write("-->\n\n")
self.fd.write('<GraphXML>\n<graph isDirected="true">\n')
self.flush()
def newUrl (self, urlData):
"""write one node and all possible edges"""
if self.fd is None: return
node = urlData
if node.url and not self.nodes.has_key(node.url):
node.id = self.nodeid
self.nodes[node.url] = node
self.nodeid += 1
self.fd.write(' <node name="%d" ' % node.id)
self.fd.write(">\n")
if self.has_field("realurl"):
self.fd.write(" <label>%s</label>\n" %\
linkcheck.XmlUtils.xmlquote(linkcheck.url.url_quote(node.url)))
self.fd.write(" <data>\n")
if node.dltime>=0 and self.has_field("dltime"):
self.fd.write(" <dltime>%f</dltime>\n" % node.dltime)
if node.dlsize>=0 and self.has_field("dlsize"):
self.fd.write(" <dlsize>%d</dlsize>\n" % node.dlsize)
if node.checktime and self.has_field("checktime"):
self.fd.write(" <checktime>%f</checktime>\n" \
% node.checktime)
if self.has_field("extern"):
self.fd.write(" <extern>%d</extern>\n" % \
(node.extern and 1 or 0))
self.fd.write(" </data>\n")
self.fd.write(" </node>\n")
self.writeEdges()
def writeEdges (self):
"""write all edges we can find in the graph in a brute-force
manner. Better would be a mapping of parent urls.
"""
for node in self.nodes.values():
if self.nodes.has_key(node.parentName):
self.fd.write(" <edge")
self.fd.write(' source="%d"' % \
self.nodes[node.parentName].id)
self.fd.write(' target="%d"' % node.id)
self.fd.write(">\n")
if self.has_field("url"):
self.fd.write(" <label>%s</label>\n" % \
linkcheck.XmlUtils.linkcheck.xmlquote(node.urlName))
self.fd.write(" <data>\n")
if self.has_field("result"):
self.fd.write(" <valid>%d</valid>\n" % \
(node.valid and 1 or 0))
self.fd.write(" </data>\n")
self.fd.write(" </edge>\n")
self.flush()
def endOfOutput (self, linknumber=-1):
if self.fd is None: return
self.fd.write("</graph>\n</GraphXML>\n")
if self.has_field("outro"):
self.stoptime = time.time()
duration = self.stoptime - self.starttime
self.fd.write("<!-- ")
self.fd.write(linkcheck.i18n._("Stopped checking at %s (%s)\n")%\
(linkcheck.logger.strtime(self.stoptime),
linkcheck.logger.strduration(duration)))
self.fd.write("-->")
self.flush()
self.fd = None

View file

@ -0,0 +1,74 @@
# -*- coding: iso-8859-1 -*-
"""Output logging support for different formats"""
# Copyright (C) 2000-2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import time
import linkcheck
def strtime (t):
"""return ISO 8601 formatted time"""
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t)) + \
strtimezone()
def strduration (duration):
"""return string formatted time duration"""
name = linkcheck.i18n._("seconds")
if duration > 60:
duration = duration / 60
name = linkcheck.i18n._("minutes")
if duration > 60:
duration = duration / 60
name = linkcheck.i18n._("hours")
return " %.3f %s"%(duration, name)
def strtimezone ():
"""return timezone info, %z on some platforms, but not supported on all"""
if time.daylight:
zone = time.altzone
else:
zone = time.timezone
return "%+04d" % int(-zone/3600)
import linkcheck.logger.StandardLogger
import linkcheck.logger.HtmlLogger
import linkcheck.logger.ColoredLogger
import linkcheck.logger.GMLLogger
import linkcheck.logger.SQLLogger
import linkcheck.logger.CSVLogger
import linkcheck.logger.BlacklistLogger
import linkcheck.logger.XMLLogger
import linkcheck.logger.NoneLogger
# default logger classes
Loggers = {
"text": linkcheck.logger.StandardLogger.StandardLogger,
"html": linkcheck.logger.HtmlLogger.HtmlLogger,
"colored": linkcheck.logger.ColoredLogger.ColoredLogger,
"gml": linkcheck.logger.GMLLogger.GMLLogger,
"sql": linkcheck.logger.SQLLogger.SQLLogger,
"csv": linkcheck.logger.CSVLogger.CSVLogger,
"blacklist": linkcheck.logger.BlacklistLogger.BlacklistLogger,
"xml": linkcheck.logger.XMLLogger.XMLLogger,
"none": linkcheck.logger.NoneLogger.NoneLogger,
}
# for easy printing: a comma separated logger list
LoggerKeys = ", ".join(Loggers.keys())