new custom XML logger

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2759 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2005-07-15 21:33:15 +00:00
parent 85038181b6
commit 3011eb5ee6
4 changed files with 57 additions and 72 deletions

View file

@ -39,6 +39,11 @@
Closes: SF bug #1238906
Changed: setup.py
* Renamed the old "xml" output logger to "gxml" and added a new
"xml" output logger which writes a custom XML format.
Type: feature
Changed: linkchecker, linkcheck/logger/*xml*.py
3.0 "The Jacket" (released 8.7.2005)
* Catch all check errors, not just the ones inside of URL checking.

View file

@ -92,7 +92,8 @@ import linkcheck.logger.dot
import linkcheck.logger.sql
import linkcheck.logger.csvlog
import linkcheck.logger.blacklist
import linkcheck.logger.xmllog
import linkcheck.logger.gxml
import linkcheck.logger.customxml
import linkcheck.logger.none
@ -105,7 +106,8 @@ Loggers = {
"sql": linkcheck.logger.sql.SQLLogger,
"csv": linkcheck.logger.csvlog.CSVLogger,
"blacklist": linkcheck.logger.blacklist.BlacklistLogger,
"xml": linkcheck.logger.xmllog.XMLLogger,
"gxml": linkcheck.logger.gxml.GraphXMLLogger,
"xml": linkcheck.logger.customxml.CustomXMLLogger,
"none": linkcheck.logger.none.NoneLogger,
}
# for easy printing: a comma separated logger list

View file

@ -138,6 +138,9 @@ class Configuration (dict):
self['xml'] = {
"filename": "linkchecker-out.xml",
}
self['gxml'] = {
"filename": "linkchecker-out.gxml",
}
self['dot'] = {
"filename": "linkchecker-out.dot",
"encoding": "ascii",

View file

@ -15,7 +15,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
"""
An xml logger.
Base class for XML loggers.
"""
import time
@ -73,26 +73,25 @@ class XMLLogger (linkcheck.logger.Logger):
"""
super(XMLLogger, self).__init__(**args)
self.init_fileoutput(args)
self.nodes = {}
self.nodeid = 0
self.indent = u" "
self.level = 0
def comment (self, s, **args):
"""
Print HTML comment.
Write XML comment.
"""
self.write(u"<!-- ")
self.write(s, **args)
self.writeln(u" -->")
def start_output (self):
def xml_start_output (self, version="1.0", encoding="utf8"):
"""
Print start of checking info as xml comment.
Write start of checking info as xml comment.
"""
linkcheck.logger.Logger.start_output(self)
if self.fd is None:
return
# XXX wrap file with codecs
self.starttime = time.time()
self.writeln(u'<?xml version="1.0"?>')
self.writeln(u'<?xml version="%s" encoding="%s"?>' % \
(xmlquoteattr(version), xmlquoteattr(encoding)))
if self.has_part("intro"):
self.comment(_("created by %s at %s") %
(linkcheck.configuration.AppName,
@ -103,72 +102,48 @@ class XMLLogger (linkcheck.logger.Logger):
linkcheck.configuration.Email)
self.check_date()
self.writeln()
self.writeln(u'<GraphXML>')
self.writeln(u'<graph isDirected="true">')
self.flush()
def log_url (self, url_data):
def xml_end_output (self):
"""
Write one node and all possible edges.
Write end of checking info as xml comment.
"""
if self.fd is None:
return
node = url_data
if node.url and not self.nodes.has_key(node.url):
node.id = self.nodeid
self.nodes[node.url] = node
self.nodeid += 1
self.writeln(u' <node name="%d">' % node.id)
if self.has_part("realurl"):
self.writeln(u" <label>%s</label>" % xmlquote(node.url))
self.writeln(u" <data>")
if node.dltime >= 0 and self.has_part("dltime"):
self.writeln(u" <dltime>%f</dltime>" % node.dltime)
if node.dlsize >= 0 and self.has_part("dlsize"):
self.writeln(u" <dlsize>%d</dlsize>" % node.dlsize)
if node.checktime and self.has_part("checktime"):
self.writeln(u" <checktime>%f</checktime>" %
node.checktime)
if self.has_part("extern"):
self.writeln(u" <extern>%d</extern>" %
(node.extern[0] and 1 or 0))
self.writeln(u" </data>")
self.writeln(u" </node>")
self.write_edges()
def write_edges (self):
"""
Write all edges we can find in the graph in a brute-force
manner. Better would be a mapping of parent URLs.
"""
for node in self.nodes.values():
if self.nodes.has_key(node.parent_url):
self.write(u" <edge")
self.write(u' source="%d"' % self.nodes[node.parent_url].id)
self.writeln(u' target="%d">' % node.id)
if self.has_part("url"):
self.writeln(u" <label>%s</label>" % \
xmlquote(node.base_url or u""))
self.writeln(u" <data>")
if self.has_part("result"):
self.writeln(u" <valid>%d</valid>" % \
(node.valid and 1 or 0))
self.writeln(u" </data>")
self.writeln(u" </edge>")
self.flush()
def end_output (self):
"""
Finish graph output, and print end of checking info as xml comment.
"""
if self.fd is None:
return
self.writeln(u"</graph>")
self.writeln(u"</GraphXML>")
if self.has_part("outro"):
self.stoptime = time.time()
duration = self.stoptime - self.starttime
self.comment(_("Stopped checking at %s (%s)") % \
(linkcheck.strformat.strtime(self.stoptime),
linkcheck.strformat.strduration(duration)))
self.close_fileoutput()
def xml_starttag (self, name, attrs=None):
"""
Write XML start tag.
"""
self.write(self.indent*self.level)
self.write(u"<%s" % xmlquote(name))
if attrs:
for name, value in attrs.items():
args = (xmlquote(name), xmlquoteattr(value))
self.write(u' %s="%s"' % args)
self.writeln(u">");
self.level += 1
def xml_endtag (self, name):
"""
Write XML end tag.
"""
self.write(self.indent*self.level)
self.writeln(u"<%s>" % xmlquote(name))
self.level -= 1
assert self.level >= 0
def xml_tag (self, name, content, attrs=None):
"""
Write XML tag with content.
"""
self.write(self.indent*self.level)
self.write(u"<%s" % xmlquote(name))
if attrs:
for name, value in attrs.items():
args = (xmlquote(name), xmlquoteattr(value))
self.write(u' %s="%s"' % args)
self.writeln(u">%s</%s>" % (xmlquote(content), xmlquote(name)))