mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-16 22:10:26 +00:00
added DOT graph output logger
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2216 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
7fb9f12eeb
commit
719143c8a1
11 changed files with 176 additions and 11 deletions
11
ChangeLog
11
ChangeLog
|
|
@ -13,6 +13,17 @@
|
|||
Type: documentation
|
||||
Changed: linkchecker, doc/{en,de,fr}/linkchecker.1
|
||||
|
||||
* Always write the GML graph beginning, not just when "intro" field
|
||||
is defined.
|
||||
Type: bugfix
|
||||
Changed: linkcheck/logger/gml.py
|
||||
|
||||
* Added DOT graph format output logger.
|
||||
Type: feature
|
||||
Added: linkcheck/logger/dot.py
|
||||
Changed: linkcheck/logger/__init__.py, linkcheck/configuration.py,
|
||||
linkchecker
|
||||
|
||||
2.2 "Cube" (released 25.01.2005)
|
||||
|
||||
* CSV log format changes:
|
||||
|
|
|
|||
3
TODO
3
TODO
|
|
@ -1,5 +1,8 @@
|
|||
Possible improvements people could work on:
|
||||
|
||||
- [FEATURE] .dot output logger
|
||||
http://www.graphviz.org/cvs/doc/info/lang.html
|
||||
|
||||
- [OPTIMIZATION] Connection pooling. Right now we open for every link a
|
||||
new connection.
|
||||
Must be thread-safe, must handle timeouts and connection expiration
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ mit \fB-F\fP n
|
|||
.TP
|
||||
\fB-o\fP\fITyp\fP[\fB/\fP\fIEnkodierung\fP], \fB--output=\fP\fITyp\fP[\fB/\fP\fIEnkodierung\fP]
|
||||
Spezifiziere die Ausgabe als \fBtext\fP, \fBhtml\fP, \fBsql\fP,
|
||||
\fBcsv\fP, \fBgml\fP, \fBxml\fP, \fBnone\fP oder \fBblacklist\fP.
|
||||
\fBcsv\fP, \fBgml\fP, \fBdot\fP, \fBxml\fP, \fBnone\fP oder \fBblacklist\fP.
|
||||
Standardausgabe ist \fBtext\fP. \fIEnkodierung\fP gibt die
|
||||
Ausgabekodierung an, die Standardkodierung ist \fBiso-8859-15\fP.
|
||||
Gültige Encodierungen sind unter
|
||||
|
|
@ -116,7 +116,7 @@ Der \fIDateiname\fP Teil wird beim Ausgabetyp \fBnone\fP ignoriert,
|
|||
ansonsten wird die Datei überschreiben falls sie existiert.
|
||||
Sie können diese Option mehr als einmal verwenden. Gültige
|
||||
Ausgabetypen sind \fBtext\fP, \fBhtml\fP, \fBsql\fP,
|
||||
\fBcsv\fP, \fBgml\fP, \fBxml\fP, \fBnone\fP oder \fBblacklist\fP.
|
||||
\fBcsv\fP, \fBgml\fP, \fBdot\fP, \fBxml\fP, \fBnone\fP oder \fBblacklist\fP.
|
||||
Standard ist keine Dateiausgabe. \fIEnkodierung\fP gibt die
|
||||
Ausgabekodierung an, die Standardkodierung ist \fBiso-8859-15\fP.
|
||||
Gültige Encodierungen sind unter
|
||||
|
|
@ -241,6 +241,11 @@ Gebe Vater-Kind Beziehungen zwischen verkn
|
|||
Sie sollten die Option \fB--verbose\fP benutzen, um einen vollständigen
|
||||
Graphen zu erhalten.
|
||||
.TP
|
||||
\fBdot\fP
|
||||
Gebe Vater-Kind Beziehungen zwischen verknüpften URLs als DOT Graphen aus.
|
||||
Sie sollten die Option \fB--verbose\fP benutzen, um einen vollständigen
|
||||
Graphen zu erhalten.
|
||||
.TP
|
||||
\fBxml\fP
|
||||
Gebe Prüfresultat als maschinenlesbare XML-Datei aus.
|
||||
.TP
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ This is only useful with \fB-F\fP.
|
|||
.TP
|
||||
\fB-o\fP\fItype\fP, \fB--output=\fP\fItype\fP[\fB/\fP\fIencoding\fP]
|
||||
Specify output type as \fBtext\fP, \fBhtml\fP, \fBsql\fP,
|
||||
\fBcsv\fP, \fBgml\fP, \fBxml\fP, \fBnone\fP or \fBblacklist\fP.
|
||||
\fBcsv\fP, \fBgml\fP, \fBdot\fP, \fBxml\fP, \fBnone\fP or \fBblacklist\fP.
|
||||
Default type is \fBtext\fP. The various output types are documented
|
||||
below.
|
||||
\fIencoding\fP specifies the output encoding, the default is
|
||||
|
|
@ -124,7 +124,7 @@ The \fIfilename\fP part of the \fBnone\fP output type will be ignored,
|
|||
else if the file already exists, it will be overwritten.
|
||||
You can specify this option more than once. Valid file output types
|
||||
are \fBtext\fP, \fBhtml\fP, \fBsql\fP,
|
||||
\fBcsv\fP, \fBgml\fP, \fBxml\fP, \fBnone\fP or \fBblacklist\fP
|
||||
\fBcsv\fP, \fBgml\fP, \fBdot\fP, \fBxml\fP, \fBnone\fP or \fBblacklist\fP
|
||||
Default is no file output. The various output types are documented
|
||||
below. Note that you can suppress all console output
|
||||
with the option \fB-o none\fP.
|
||||
|
|
@ -240,6 +240,10 @@ Log check result in CSV format with one URL per line.
|
|||
Log parent-child relations between linked URLs as a GML graph.
|
||||
You should use the \fB--verbose\fP option to get a complete graph.
|
||||
.TP
|
||||
\fBdot\fP
|
||||
Log parent-child relations between linked URLs as a DOT graph.
|
||||
You should use the \fB--verbose\fP option to get a complete graph.
|
||||
.TP
|
||||
\fBxml\fP
|
||||
Log check result as machine-readable XML file.
|
||||
.TP
|
||||
|
|
|
|||
|
|
@ -97,7 +97,8 @@ Ex
|
|||
Cette option n'est utile qu'avec \fB\-F\fP.
|
||||
.TP
|
||||
\fB\-o\fP\fItype\fP, \fB\-\-output=\fP\fItype\fP[\fB/\fP\fIencodage\fP]
|
||||
Spécifier le type de sortie. Les types possibles sont \fBtext\fP, \fBhtml\fP, \fBsql\fP, \fBcsv\fP, \fBgml\fP, \fBxml\fP, \fBnone\fP ou \fBblacklist\fP. Le type par défaut est \fBtext\fP. Les différents types de sortie sont documentés ci\-dessous.
|
||||
Spécifier le type de sortie. Les types possibles sont \fBtext\fP,
|
||||
\fBhtml\fP, \fBsql\fP, \fBcsv\fP, \fBgml\fP, \fBdot\fP, \fBxml\fP, \fBnone\fP ou \fBblacklist\fP. Le type par défaut est \fBtext\fP. Les différents types de sortie sont documentés ci\-dessous.
|
||||
\fIencodage\fP permet de spécifier l'encodage de sortie, la valeur par défaut étant \fBiso\-8859\-15\fP.
|
||||
Les encodages valides sont disponibles sur \fBhttp://docs.python.org/lib/node127.html\fP.
|
||||
.TP
|
||||
|
|
@ -108,7 +109,7 @@ Enregistrer la sortie dans un fichier \fBlinkchecker\-out.\fP\fItype\fP,
|
|||
Les encodages valides sont disponibles sur \fBhttp://docs.python.org/lib/node127.html\fP.
|
||||
La partie \fInom_fichier\fP du type de sortie \fBnone\fP est ignorée, sinon, si le fichier existe déjà, il sera écrasé.
|
||||
Vous pouvez spécifier l'option plusieurs fois. Les types de sortie valides pour les fichiers sont \fBtext\fP, \fBhtml\fP, \fBsql\fP,
|
||||
\fBcsv\fP, \fBgml\fP, \fBxml\fP, \fBnone\fP ou \fBblacklist\fP.
|
||||
\fBcsv\fP, \fBgml\fP, \fBdot\fP, \fBxml\fP, \fBnone\fP ou \fBblacklist\fP.
|
||||
Par défaut, il n'y a pas de fichier de sortie. Les différents types de sortie sont documentés ci\-dessous. Il faut noter que vous pouvez supprimer toutes les sorties console avec l'option \fB\-o none\fP.
|
||||
.TP
|
||||
\fB\-\-no\-status\fP
|
||||
|
|
@ -194,6 +195,9 @@ Journaliser le r
|
|||
\fBgml\fP
|
||||
Journaliser les relations fils/père entre les URL liées dans un graphe GML. Vous devez utiliser l'option \fB\-\-verbose\fP pour avoir un graphe complet.
|
||||
.TP
|
||||
\fBgml\fP
|
||||
Journaliser les relations fils/père entre les URL liées dans un graphe DOT. Vous devez utiliser l'option \fB\-\-verbose\fP pour avoir un graphe complet.
|
||||
.TP
|
||||
\fBxml\fP
|
||||
Journaliser le résultat de la vérification dans un fichier au format XML.
|
||||
.TP
|
||||
|
|
|
|||
|
|
@ -82,6 +82,7 @@ def get_link_pat (arg, strict=False):
|
|||
import linkcheck.logger.text
|
||||
import linkcheck.logger.html
|
||||
import linkcheck.logger.gml
|
||||
import linkcheck.logger.dot
|
||||
import linkcheck.logger.sql
|
||||
import linkcheck.logger.csvlog
|
||||
import linkcheck.logger.blacklist
|
||||
|
|
@ -94,6 +95,7 @@ Loggers = {
|
|||
"text": linkcheck.logger.text.TextLogger,
|
||||
"html": linkcheck.logger.html.HtmlLogger,
|
||||
"gml": linkcheck.logger.gml.GMLLogger,
|
||||
"dot": linkcheck.logger.dot.DOTLogger,
|
||||
"sql": linkcheck.logger.sql.SQLLogger,
|
||||
"csv": linkcheck.logger.csvlog.CSVLogger,
|
||||
"blacklist": linkcheck.logger.blacklist.BlacklistLogger,
|
||||
|
|
|
|||
|
|
@ -130,6 +130,9 @@ class Configuration (dict):
|
|||
self['xml'] = {
|
||||
"filename": "linkchecker-out.xml",
|
||||
}
|
||||
self['dot'] = {
|
||||
"filename": "linkchecker-out.dot",
|
||||
}
|
||||
self['none'] = {}
|
||||
self['logger'] = self.logger_new('text')
|
||||
self["warningregex"] = None
|
||||
|
|
|
|||
131
linkcheck/logger/dot.py
Normal file
131
linkcheck/logger/dot.py
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2005 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
"""
|
||||
A DOT graph format logger. The specification has been taken from
|
||||
http://www.graphviz.org/cvs/doc/info/lang.html.
|
||||
"""
|
||||
|
||||
import time
|
||||
import os
|
||||
|
||||
import linkcheck.configuration
|
||||
|
||||
|
||||
class DOTLogger (linkcheck.logger.Logger):
|
||||
"""
|
||||
Generates .dot sitemap graphs. Use graphviz to see the sitemap graph.
|
||||
"""
|
||||
|
||||
def __init__ (self, **args):
|
||||
"""
|
||||
Initialize graph node list and internal id counter.
|
||||
"""
|
||||
super(GMLLogger, self).__init__(**args)
|
||||
self.init_fileoutput(args)
|
||||
self.nodes = {}
|
||||
self.nodeid = 0
|
||||
|
||||
def start_output (self):
|
||||
"""
|
||||
Print start of checking info as DOT comment.
|
||||
"""
|
||||
super(DOTLogger, self).start_output()
|
||||
if self.fd is None:
|
||||
return
|
||||
self.starttime = time.time()
|
||||
if self.has_field("intro"):
|
||||
self.comment(_("created by %s at %s") % \
|
||||
(linkcheck.configuration.AppName,
|
||||
linkcheck.strformat.strtime(self.starttime)))
|
||||
self.comment(_("Get the newest version at %(url)s") % \
|
||||
{'url': linkcheck.configuration.Url})
|
||||
self.comment(_("Write comments and bugs to %(email)s") % \
|
||||
{'email': linkcheck.configuration.Email})
|
||||
self.check_date()
|
||||
self.writeln()
|
||||
self.writeln(u"graph {")
|
||||
self.flush()
|
||||
|
||||
def comment (self, s, **args):
|
||||
"""
|
||||
Print DOT comment.
|
||||
"""
|
||||
self.write(u"// ")
|
||||
self.writeln(s=s, **args)
|
||||
|
||||
def new_url (self, url_data):
|
||||
"""
|
||||
Write one node and all possible edges.
|
||||
"""
|
||||
if self.fd is None:
|
||||
return
|
||||
node = url_data
|
||||
if node.url and not self.nodes.has_key(node.url):
|
||||
node.id = self.nodeid
|
||||
self.nodes[node.url] = node
|
||||
self.nodeid += 1
|
||||
self.writeln(u" %d [" % node.id)
|
||||
if self.has_field("realurl"):
|
||||
self.writeln(u' label="%s",' % dotquote(node.url))
|
||||
if node.dltime >= 0 and self.has_field("dltime"):
|
||||
self.writeln(u" dltime=%d," % node.dltime)
|
||||
if node.dlsize >= 0 and self.has_field("dlsize"):
|
||||
self.writeln(u" dlsize=%d," % node.dlsize)
|
||||
if node.checktime and self.has_field("checktime"):
|
||||
self.writeln(u" checktime=%d," % node.checktime)
|
||||
if self.has_field("extern"):
|
||||
self.writeln(u" extern=%d," % (node.extern and 1 or 0))
|
||||
self.writeln(u" ];")
|
||||
self.write_edges()
|
||||
|
||||
def write_edges (self):
|
||||
"""
|
||||
Write all edges we can find in the graph in a brute-force
|
||||
manner. Better would be a mapping of parent urls.
|
||||
"""
|
||||
for node in self.nodes.values():
|
||||
if self.nodes.has_key(node.parent_url):
|
||||
source = self.nodes[node.parent_url].id
|
||||
target = node.id
|
||||
self.writeln(u" %d -> %d [" % (source, target))
|
||||
self.writeln(u' label="%s",' % dotquote(node.base_url))
|
||||
if self.has_field("result"):
|
||||
self.writeln(u" valid=%d," % (node.valid and 1 or 0))
|
||||
self.writeln(u" ];")
|
||||
self.flush()
|
||||
|
||||
def end_output (self, linknumber=-1):
|
||||
"""
|
||||
Print end of checking info as DOT comment.
|
||||
"""
|
||||
if self.fd is None:
|
||||
return
|
||||
self.writeln(u"}")
|
||||
if self.has_field("outro"):
|
||||
self.stoptime = time.time()
|
||||
duration = self.stoptime - self.starttime
|
||||
self.comment(_("Stopped checking at %s (%s)")%\
|
||||
(linkcheck.strformat.strtime(self.stoptime),
|
||||
linkcheck.strformat.strduration(duration)))
|
||||
self.flush()
|
||||
if self.close_fd:
|
||||
self.fd.close()
|
||||
self.fd = None
|
||||
|
||||
|
||||
def dotquote (s):
|
||||
return s.replace('"', '\\"')
|
||||
|
|
@ -27,7 +27,7 @@ import linkcheck.configuration
|
|||
class GMLLogger (linkcheck.logger.Logger):
|
||||
"""
|
||||
GML means Graph Modeling Language. Use a GML tool to see
|
||||
your sitemap graph.
|
||||
the sitemap graph.
|
||||
"""
|
||||
|
||||
def __init__ (self, **args):
|
||||
|
|
@ -57,9 +57,9 @@ class GMLLogger (linkcheck.logger.Logger):
|
|||
{'email': linkcheck.configuration.Email})
|
||||
self.check_date()
|
||||
self.writeln()
|
||||
self.writeln(u"graph [")
|
||||
self.writeln(u" directed 1")
|
||||
self.flush()
|
||||
self.writeln(u"graph [")
|
||||
self.writeln(u" directed 1")
|
||||
self.flush()
|
||||
|
||||
def comment (self, s, **args):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -118,6 +118,8 @@ html Log URLs in keyword: argument fashion, formatted as HTML.
|
|||
csv Log check result in CSV format with one URL per line.
|
||||
gml Log parent-child relations between linked URLs as a GML graph.
|
||||
You should use the --verbose option to get a complete graph.
|
||||
dot Log parent-child relations between linked URLs as a DOT graph.
|
||||
You should use the --verbose option to get a complete graph.
|
||||
xml Log check result as machine-readable XML file.
|
||||
sql Log check result as SQL script with INSERT commands. An example
|
||||
script to create the initial SQL table is included as create.sql.
|
||||
|
|
|
|||
2
test.py
2
test.py
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python2.3
|
||||
#!/usr/bin/env python2.4
|
||||
#
|
||||
# SchoolTool - common information systems platform for school administration
|
||||
# Copyright (c) 2003 Shuttleworth Foundation
|
||||
|
|
|
|||
Loading…
Reference in a new issue