mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-05 07:20:58 +00:00
XML output
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@194 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
46ed3c6cb3
commit
a81e82f045
6 changed files with 126 additions and 40 deletions
2
Makefile
2
Makefile
|
|
@ -4,7 +4,7 @@ VERSION=$(shell python setup.py --version)
|
|||
PACKAGE = linkchecker
|
||||
NAME = $(shell python setup.py --name)
|
||||
HOST=fsinfo.cs.uni-sb.de
|
||||
LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -R -t0 -v
|
||||
LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -Fxml -R -t0 -v
|
||||
DEBPACKAGE = $(PACKAGE)_$(VERSION)_i386.deb
|
||||
SOURCES = \
|
||||
linkcheck/Config.py \
|
||||
|
|
|
|||
6
README
6
README
|
|
@ -26,14 +26,16 @@ Installing, Requirements, Running
|
|||
Read the file INSTALL.
|
||||
|
||||
|
||||
License
|
||||
--------
|
||||
License and Credits
|
||||
-------------------
|
||||
LinkChecker is licensed under the GNU Public License.
|
||||
Credits go to Guido van Rossum for making Python. His hovercraft is
|
||||
full of eels!
|
||||
As this program is directly derived from my Java link checker, additional
|
||||
credits go to Robert Forsman (the author of JCheckLinks) and his
|
||||
robots.txt parse algorithm.
|
||||
Nicolas Chauvat <Nicolas.Chauvat@logilab.fr> supplied a patch for
|
||||
an XML output logger.
|
||||
I want to thank everybody who gave me feedback, bug reports and
|
||||
suggestions.
|
||||
|
||||
|
|
|
|||
4
debian/changelog
vendored
4
debian/changelog
vendored
|
|
@ -2,8 +2,10 @@ linkchecker (1.2.8) unstable; urgency=low
|
|||
|
||||
* INSTALL: more documentation for the CGI scripts
|
||||
* Makefile: better cleaning (clean, cleandeb, distclean)
|
||||
* XML output (idea and patch from Nicolas Chauvat
|
||||
<Nicolas.Chauvat@logilab.fr>)
|
||||
|
||||
-- Bastian Kleineidam <calvin@users.sourceforge.net> Fri, 10 Nov 2000 11:54:25 +0100
|
||||
-- Bastian Kleineidam <calvin@users.sourceforge.net> Wed, 15 Nov 2000 23:27:37 +0100
|
||||
|
||||
linkchecker (1.2.7) unstable; urgency=low
|
||||
|
||||
|
|
|
|||
4
debian/dirs
vendored
4
debian/dirs
vendored
|
|
@ -1,5 +1 @@
|
|||
usr/lib/python1.5/site-packages/DNS
|
||||
usr/lib/python1.5/site-packages/linkcheck
|
||||
usr/bin
|
||||
etc
|
||||
usr/share/doc/linkchecker
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ Loggers = {
|
|||
"sql": Logging.SQLLogger,
|
||||
"csv": Logging.CSVLogger,
|
||||
"blacklist": Logging.BlacklistLogger,
|
||||
"xml": Logging.XMLLogger,
|
||||
}
|
||||
# for easy printing: a comma separated logger list
|
||||
LoggerKeys = reduce(lambda x, y: x+", "+y, Loggers.keys())
|
||||
|
|
@ -143,6 +144,9 @@ class Configuration(UserDict.UserDict):
|
|||
self.data['blacklist'] = {
|
||||
"filename": "~/.blacklist",
|
||||
}
|
||||
self.data['xml'] = {
|
||||
"filename": "linkchecker-out.xml",
|
||||
}
|
||||
# default values
|
||||
self.data['log'] = self.newLogger('text')
|
||||
self.data["quiet"] = 0
|
||||
|
|
@ -470,4 +474,3 @@ class Configuration(UserDict.UserDict):
|
|||
except ConfigParser.Error: pass
|
||||
try: self.data["allowdeny"] = cfgparser.getboolean(section, "allowdeny")
|
||||
except ConfigParser.Error: pass
|
||||
|
||||
|
|
|
|||
|
|
@ -401,7 +401,8 @@ class GMLLogger(StandardLogger):
|
|||
"""
|
||||
def __init__(self, **args):
|
||||
apply(StandardLogger.__init__, (self,), args)
|
||||
self.nodes = []
|
||||
self.nodes = {}
|
||||
self.nodeid = 0
|
||||
|
||||
def init(self):
|
||||
self.starttime = time.time()
|
||||
|
|
@ -413,40 +414,42 @@ class GMLLogger(StandardLogger):
|
|||
self.fd.write("graph [\n directed 1\n")
|
||||
self.fd.flush()
|
||||
|
||||
def newUrl(self, urlData):
|
||||
self.nodes.append(urlData)
|
||||
|
||||
def endOfOutput(self, linknumber=-1):
|
||||
writtenNodes = {}
|
||||
# write nodes
|
||||
nodeid = 1
|
||||
for node in self.nodes:
|
||||
if node.url and not writtenNodes.has_key(node.url):
|
||||
self.fd.write(" node [\n")
|
||||
self.fd.write(" id %d\n" % nodeid)
|
||||
self.fd.write(' label "%s"\n' % node.url)
|
||||
if node.downloadtime:
|
||||
self.fd.write(" dltime %d\n" % node.downloadtime)
|
||||
if node.checktime:
|
||||
self.fd.write(" checktime %d\n" % node.checktime)
|
||||
self.fd.write(" extern ")
|
||||
if node.extern: self.fd.write("1")
|
||||
else: self.fd.write("0")
|
||||
self.fd.write("\n ]\n")
|
||||
writtenNodes[node.url] = nodeid
|
||||
nodeid = nodeid + 1
|
||||
# write edges
|
||||
for node in self.nodes:
|
||||
if node.url and node.parentName:
|
||||
def newUrl(self, urlData):
|
||||
"""write one node and all possible edges"""
|
||||
node = urlData
|
||||
if node.url and not self.nodes.has_key(node.url):
|
||||
node.id = self.nodeid
|
||||
self.nodes[node.url] = node
|
||||
self.nodeid = self.nodeid + 1
|
||||
self.fd.write(" node [\n")
|
||||
self.fd.write(" id %d\n" % node.id)
|
||||
self.fd.write(' label "%s"\n' % node.url)
|
||||
if node.downloadtime:
|
||||
self.fd.write(" dltime %d\n" % node.downloadtime)
|
||||
if node.checktime:
|
||||
self.fd.write(" checktime %d\n" % node.checktime)
|
||||
self.fd.write(" extern "+(node.extern and "1" or "0"))
|
||||
self.fd.write("\n ]\n")
|
||||
self.writeEdges()
|
||||
|
||||
|
||||
def writeEdges(self):
|
||||
"""write all edges we can find in the graph in a brute-force
|
||||
manner. Better would be a mapping of parent urls.
|
||||
"""
|
||||
for node in self.nodes.values():
|
||||
if self.nodes.has_key(node.parentName):
|
||||
self.fd.write(" edge [\n")
|
||||
self.fd.write(' label "%s"\n' % node.urlName)
|
||||
self.fd.write(" source %d\n"%writtenNodes[node.parentName])
|
||||
self.fd.write(" target %d\n" % writtenNodes[node.url])
|
||||
self.fd.write(" valid ")
|
||||
if node.valid: self.fd.write("1")
|
||||
else: self.fd.write("0")
|
||||
self.fd.write(" source %d\n" % self.nodes[node.parentName])
|
||||
self.fd.write(" target %d\n" % node.id)
|
||||
self.fd.write(" valid "+(node.valid and "1" or "0"))
|
||||
self.fd.write("\n ]\n")
|
||||
# end of output
|
||||
self.fd.flush()
|
||||
|
||||
|
||||
def endOfOutput(self, linknumber=-1):
|
||||
self.fd.write("]\n")
|
||||
self.stoptime = time.time()
|
||||
duration = self.stoptime - self.starttime
|
||||
|
|
@ -464,6 +467,86 @@ class GMLLogger(StandardLogger):
|
|||
self.fd = None
|
||||
|
||||
|
||||
class XMLLogger(StandardLogger):
|
||||
"""XML output mirroring the GML structure. Easy to parse with any XML
|
||||
tool."""
|
||||
def __init__(self, **args):
|
||||
apply(StandardLogger.__init__, (self,), args)
|
||||
self.nodes = {}
|
||||
self.nodeid = 0
|
||||
|
||||
def init(self):
|
||||
self.starttime = time.time()
|
||||
self.fd.write("<?xml version='1.0'?>\n")
|
||||
self.fd.write("<!--\n")
|
||||
self.fd.write(" "+_("created by %s at %s\n") % \
|
||||
(Config.AppName, _strtime(self.starttime)))
|
||||
self.fd.write(" "+_("Get the newest version at %s\n") % Config.Url)
|
||||
self.fd.write(" "+_("Write comments and bugs to %s\n\n") % \
|
||||
Config.Email)
|
||||
self.fd.write("-->\n\n")
|
||||
self.fd.write("<GraphXML>\n<graph isDirected='true'>\n")
|
||||
self.fd.flush()
|
||||
|
||||
def newUrl(self, urlData):
|
||||
"""write one node and all possible edges"""
|
||||
node = urlData
|
||||
if node.url and not self.nodes.has_key(node.url):
|
||||
node.id = self.nodeid
|
||||
self.nodes[node.url] = node
|
||||
self.nodeid = self.nodeid + 1
|
||||
self.fd.write(" <node name='%d' " % node.id)
|
||||
self.fd.write(">\n")
|
||||
self.fd.write(" <label>%s</label>\n" % node.url)
|
||||
self.fd.write(" <data>\n")
|
||||
if node.downloadtime:
|
||||
self.fd.write(" <dltime>%d</dltime>\n" \
|
||||
% node.downloadtime)
|
||||
if node.checktime:
|
||||
self.fd.write(" <checktime>%d</checktime>\n" \
|
||||
% node.checktime)
|
||||
self.fd.write(" <extern>%d</extern>\n" % node.extern)
|
||||
self.fd.write(" </data>\n")
|
||||
self.fd.write(" </node>\n")
|
||||
self.writeEdges()
|
||||
|
||||
def writeEdges(self):
|
||||
"""write all edges we can find in the graph in a brute-force
|
||||
manner. Better would be a mapping of parent urls.
|
||||
"""
|
||||
for node in self.nodes.values():
|
||||
if self.nodes.has_key(node.parentName):
|
||||
self.fd.write(" <edge")
|
||||
self.fd.write(" source='%d'" % self.nodes[node.parentName])
|
||||
self.fd.write(" target='%d'" % node.id)
|
||||
self.fd.write(">\n")
|
||||
self.fd.write(" <label>'%s'</label>\n" % node.urlName)
|
||||
self.fd.write(" <data>\n")
|
||||
self.fd.write(" <valid>%d</valid>" % (self.valid and 1 or 0))
|
||||
self.fd.write(" </data>\n")
|
||||
self.fd.write(" </edge>\n")
|
||||
self.fd.flush()
|
||||
|
||||
def endOfOutput(self, linknumber=-1):
|
||||
self.fd.write("</graph>\n</GraphXML>\n")
|
||||
self.stoptime = time.time()
|
||||
duration = self.stoptime - self.starttime
|
||||
name = _("seconds")
|
||||
self.fd.write("<!-- ")
|
||||
self.fd.write(_("Stopped checking at %s") % _strtime(self.stoptime))
|
||||
if duration > 60:
|
||||
duration = duration / 60
|
||||
name = _("minutes")
|
||||
if duration > 60:
|
||||
duration = duration / 60
|
||||
name = _("hours")
|
||||
self.fd.write(" (%.3f %s)\n" % (duration, name))
|
||||
self.fd.write("-->")
|
||||
self.fd.flush()
|
||||
self.fd = None
|
||||
|
||||
|
||||
|
||||
class SQLLogger(StandardLogger):
|
||||
""" SQL output for PostgreSQL, not tested"""
|
||||
def __init__(self, **args):
|
||||
|
|
|
|||
Loading…
Reference in a new issue