linkchecker/linkcheck/logger/XMLLogger.py
2004-07-26 11:18:12 +00:00

139 lines
5.1 KiB
Python

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2000-2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import time
import xml.sax.saxutils
import linkcheck.logger.StandardLogger
import bk.i18n
xmlattr_entities = {
"&": "&",
"<": "&lt;",
">": "&gt;",
"\"": "&quot;",
}
def xmlquote (s):
"""quote characters for XML"""
return xml.sax.saxutils.escape(s)
def xmlquoteattr (s):
"""quote XML attribute, ready for inclusion with double quotes"""
return xml.sax.saxutils.escape(s, xmlattr_entities)
def xmlunquote (s):
"""unquote characters from XML"""
return xml.sax.saxutils.unescape(s)
def xmlunquoteattr (s):
"""unquote attributes from XML"""
return xml.sax.saxutils.unescape(s, xmlattr_entities)
class XMLLogger (linkcheck.logger.StandardLogger.StandardLogger):
"""XML output mirroring the GML structure. Easy to parse with any XML
tool."""
def __init__ (self, **args):
super(XMLLogger, self).__init__(**args)
self.nodes = {}
self.nodeid = 0
def init (self):
linkcheck.logger.Logger.Logger.init(self)
if self.fd is None: return
self.starttime = time.time()
self.fd.write('<?xml version="1.0"?>\n')
if self.has_field("intro"):
self.fd.write("<!--\n")
self.fd.write(" "+bk.i18n._("created by %s at %s\n") % \
(linkcheck.Config.AppName, bk.strtime.strtime(self.starttime)))
self.fd.write(" "+bk.i18n._("Get the newest version at %s\n") % linkcheck.Config.Url)
self.fd.write(" "+bk.i18n._("Write comments and bugs to %s\n\n") % \
linkcheck.Config.Email)
self.fd.write("-->\n\n")
self.fd.write('<GraphXML>\n<graph isDirected="true">\n')
self.flush()
def newUrl (self, urlData):
"""write one node and all possible edges"""
if self.fd is None: return
node = urlData
if node.url and not self.nodes.has_key(node.url):
node.id = self.nodeid
self.nodes[node.url] = node
self.nodeid += 1
self.fd.write(' <node name="%d" ' % node.id)
self.fd.write(">\n")
if self.has_field("realurl"):
self.fd.write(" <label>%s</label>\n" %\
xmlquote(node.url))
self.fd.write(" <data>\n")
if node.dltime>=0 and self.has_field("dltime"):
self.fd.write(" <dltime>%f</dltime>\n" % node.dltime)
if node.dlsize>=0 and self.has_field("dlsize"):
self.fd.write(" <dlsize>%d</dlsize>\n" % node.dlsize)
if node.checktime and self.has_field("checktime"):
self.fd.write(" <checktime>%f</checktime>\n" \
% node.checktime)
if self.has_field("extern"):
self.fd.write(" <extern>%d</extern>\n" % \
(node.extern and 1 or 0))
self.fd.write(" </data>\n")
self.fd.write(" </node>\n")
self.writeEdges()
def writeEdges (self):
"""write all edges we can find in the graph in a brute-force
manner. Better would be a mapping of parent urls.
"""
for node in self.nodes.values():
if self.nodes.has_key(node.parentName):
self.fd.write(" <edge")
self.fd.write(' source="%d"' % \
self.nodes[node.parentName].id)
self.fd.write(' target="%d"' % node.id)
self.fd.write(">\n")
if self.has_field("url"):
self.fd.write(" <label>%s</label>\n" % \
xmlquote(node.urlName))
self.fd.write(" <data>\n")
if self.has_field("result"):
self.fd.write(" <valid>%d</valid>\n" % \
(node.valid and 1 or 0))
self.fd.write(" </data>\n")
self.fd.write(" </edge>\n")
self.flush()
def endOfOutput (self, linknumber=-1):
if self.fd is None: return
self.fd.write("</graph>\n</GraphXML>\n")
if self.has_field("outro"):
self.stoptime = time.time()
duration = self.stoptime - self.starttime
self.fd.write("<!-- ")
self.fd.write(bk.i18n._("Stopped checking at %s (%s)\n")%\
(bk.strtime.strtime(self.stoptime),
bk.strtime.strduration(duration)))
self.fd.write("-->")
self.flush()
self.fd = None