diff --git a/ChangeLog b/ChangeLog index e332745f..5813d2d5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -13,6 +13,17 @@ Type: documentation Changed: linkchecker, doc/{en,de,fr}/linkchecker.1 + * Always write the GML graph beginning, not just when "intro" field + is defined. + Type: bugfix + Changed: linkcheck/logger/gml.py + + * Added DOT graph format output logger. + Type: feature + Added: linkcheck/logger/dot.py + Changed: linkcheck/logger/__init__.py, linkcheck/configuration.py, + linkchecker + 2.2 "Cube" (released 25.01.2005) * CSV log format changes: diff --git a/TODO b/TODO index c01c3c0c..413bb572 100644 --- a/TODO +++ b/TODO @@ -1,5 +1,8 @@ Possible improvements people could work on: +- [FEATURE] .dot output logger + http://www.graphviz.org/cvs/doc/info/lang.html + - [OPTIMIZATION] Connection pooling. Right now we open for every link a new connection. Must be thread-safe, must handle timeouts and connection expiration diff --git a/doc/de/linkchecker.1 b/doc/de/linkchecker.1 index 5d0137e6..504075e0 100644 --- a/doc/de/linkchecker.1 +++ b/doc/de/linkchecker.1 @@ -102,7 +102,7 @@ mit \fB-F\fP n .TP \fB-o\fP\fITyp\fP[\fB/\fP\fIEnkodierung\fP], \fB--output=\fP\fITyp\fP[\fB/\fP\fIEnkodierung\fP] Spezifiziere die Ausgabe als \fBtext\fP, \fBhtml\fP, \fBsql\fP, -\fBcsv\fP, \fBgml\fP, \fBxml\fP, \fBnone\fP oder \fBblacklist\fP. +\fBcsv\fP, \fBgml\fP, \fBdot\fP, \fBxml\fP, \fBnone\fP oder \fBblacklist\fP. Standardausgabe ist \fBtext\fP. \fIEnkodierung\fP gibt die Ausgabekodierung an, die Standardkodierung ist \fBiso-8859-15\fP. Gültige Encodierungen sind unter @@ -116,7 +116,7 @@ Der \fIDateiname\fP Teil wird beim Ausgabetyp \fBnone\fP ignoriert, ansonsten wird die Datei überschreiben falls sie existiert. Sie können diese Option mehr als einmal verwenden. Gültige Ausgabetypen sind \fBtext\fP, \fBhtml\fP, \fBsql\fP, -\fBcsv\fP, \fBgml\fP, \fBxml\fP, \fBnone\fP oder \fBblacklist\fP. +\fBcsv\fP, \fBgml\fP, \fBdot\fP, \fBxml\fP, \fBnone\fP oder \fBblacklist\fP. Standard ist keine Dateiausgabe. \fIEnkodierung\fP gibt die Ausgabekodierung an, die Standardkodierung ist \fBiso-8859-15\fP. Gültige Encodierungen sind unter @@ -241,6 +241,11 @@ Gebe Vater-Kind Beziehungen zwischen verkn Sie sollten die Option \fB--verbose\fP benutzen, um einen vollständigen Graphen zu erhalten. .TP +\fBdot\fP +Gebe Vater-Kind Beziehungen zwischen verknüpften URLs als DOT Graphen aus. +Sie sollten die Option \fB--verbose\fP benutzen, um einen vollständigen +Graphen zu erhalten. +.TP \fBxml\fP Gebe Prüfresultat als maschinenlesbare XML-Datei aus. .TP diff --git a/doc/en/linkchecker.1 b/doc/en/linkchecker.1 index 7c349eaa..49f62d21 100644 --- a/doc/en/linkchecker.1 +++ b/doc/en/linkchecker.1 @@ -104,7 +104,7 @@ This is only useful with \fB-F\fP. .TP \fB-o\fP\fItype\fP, \fB--output=\fP\fItype\fP[\fB/\fP\fIencoding\fP] Specify output type as \fBtext\fP, \fBhtml\fP, \fBsql\fP, -\fBcsv\fP, \fBgml\fP, \fBxml\fP, \fBnone\fP or \fBblacklist\fP. +\fBcsv\fP, \fBgml\fP, \fBdot\fP, \fBxml\fP, \fBnone\fP or \fBblacklist\fP. Default type is \fBtext\fP. The various output types are documented below. \fIencoding\fP specifies the output encoding, the default is @@ -124,7 +124,7 @@ The \fIfilename\fP part of the \fBnone\fP output type will be ignored, else if the file already exists, it will be overwritten. You can specify this option more than once. Valid file output types are \fBtext\fP, \fBhtml\fP, \fBsql\fP, -\fBcsv\fP, \fBgml\fP, \fBxml\fP, \fBnone\fP or \fBblacklist\fP +\fBcsv\fP, \fBgml\fP, \fBdot\fP, \fBxml\fP, \fBnone\fP or \fBblacklist\fP Default is no file output. The various output types are documented below. Note that you can suppress all console output with the option \fB-o none\fP. @@ -240,6 +240,10 @@ Log check result in CSV format with one URL per line. Log parent-child relations between linked URLs as a GML graph. You should use the \fB--verbose\fP option to get a complete graph. .TP +\fBdot\fP +Log parent-child relations between linked URLs as a DOT graph. +You should use the \fB--verbose\fP option to get a complete graph. +.TP \fBxml\fP Log check result as machine-readable XML file. .TP diff --git a/doc/fr/linkchecker.1 b/doc/fr/linkchecker.1 index 489977ef..859377ab 100644 --- a/doc/fr/linkchecker.1 +++ b/doc/fr/linkchecker.1 @@ -97,7 +97,8 @@ Ex Cette option n'est utile qu'avec \fB\-F\fP. .TP \fB\-o\fP\fItype\fP, \fB\-\-output=\fP\fItype\fP[\fB/\fP\fIencodage\fP] -Spécifier le type de sortie. Les types possibles sont \fBtext\fP, \fBhtml\fP, \fBsql\fP, \fBcsv\fP, \fBgml\fP, \fBxml\fP, \fBnone\fP ou \fBblacklist\fP. Le type par défaut est \fBtext\fP. Les différents types de sortie sont documentés ci\-dessous. +Spécifier le type de sortie. Les types possibles sont \fBtext\fP, +\fBhtml\fP, \fBsql\fP, \fBcsv\fP, \fBgml\fP, \fBdot\fP, \fBxml\fP, \fBnone\fP ou \fBblacklist\fP. Le type par défaut est \fBtext\fP. Les différents types de sortie sont documentés ci\-dessous. \fIencodage\fP permet de spécifier l'encodage de sortie, la valeur par défaut étant \fBiso\-8859\-15\fP. Les encodages valides sont disponibles sur \fBhttp://docs.python.org/lib/node127.html\fP. .TP @@ -108,7 +109,7 @@ Enregistrer la sortie dans un fichier \fBlinkchecker\-out.\fP\fItype\fP, Les encodages valides sont disponibles sur \fBhttp://docs.python.org/lib/node127.html\fP. La partie \fInom_fichier\fP du type de sortie \fBnone\fP est ignorée, sinon, si le fichier existe déjà, il sera écrasé. Vous pouvez spécifier l'option plusieurs fois. Les types de sortie valides pour les fichiers sont \fBtext\fP, \fBhtml\fP, \fBsql\fP, -\fBcsv\fP, \fBgml\fP, \fBxml\fP, \fBnone\fP ou \fBblacklist\fP. +\fBcsv\fP, \fBgml\fP, \fBdot\fP, \fBxml\fP, \fBnone\fP ou \fBblacklist\fP. Par défaut, il n'y a pas de fichier de sortie. Les différents types de sortie sont documentés ci\-dessous. Il faut noter que vous pouvez supprimer toutes les sorties console avec l'option \fB\-o none\fP. .TP \fB\-\-no\-status\fP @@ -194,6 +195,9 @@ Journaliser le r \fBgml\fP Journaliser les relations fils/père entre les URL liées dans un graphe GML. Vous devez utiliser l'option \fB\-\-verbose\fP pour avoir un graphe complet. .TP +\fBgml\fP +Journaliser les relations fils/père entre les URL liées dans un graphe DOT. Vous devez utiliser l'option \fB\-\-verbose\fP pour avoir un graphe complet. +.TP \fBxml\fP Journaliser le résultat de la vérification dans un fichier au format XML. .TP diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index a347e6a2..ff4c2019 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -82,6 +82,7 @@ def get_link_pat (arg, strict=False): import linkcheck.logger.text import linkcheck.logger.html import linkcheck.logger.gml +import linkcheck.logger.dot import linkcheck.logger.sql import linkcheck.logger.csvlog import linkcheck.logger.blacklist @@ -94,6 +95,7 @@ Loggers = { "text": linkcheck.logger.text.TextLogger, "html": linkcheck.logger.html.HtmlLogger, "gml": linkcheck.logger.gml.GMLLogger, + "dot": linkcheck.logger.dot.DOTLogger, "sql": linkcheck.logger.sql.SQLLogger, "csv": linkcheck.logger.csvlog.CSVLogger, "blacklist": linkcheck.logger.blacklist.BlacklistLogger, diff --git a/linkcheck/configuration.py b/linkcheck/configuration.py index 8b2f1f87..385635e2 100644 --- a/linkcheck/configuration.py +++ b/linkcheck/configuration.py @@ -130,6 +130,9 @@ class Configuration (dict): self['xml'] = { "filename": "linkchecker-out.xml", } + self['dot'] = { + "filename": "linkchecker-out.dot", + } self['none'] = {} self['logger'] = self.logger_new('text') self["warningregex"] = None diff --git a/linkcheck/logger/dot.py b/linkcheck/logger/dot.py new file mode 100644 index 00000000..1c097460 --- /dev/null +++ b/linkcheck/logger/dot.py @@ -0,0 +1,131 @@ +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2005 Bastian Kleineidam +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +""" +A DOT graph format logger. The specification has been taken from +http://www.graphviz.org/cvs/doc/info/lang.html. +""" + +import time +import os + +import linkcheck.configuration + + +class DOTLogger (linkcheck.logger.Logger): + """ + Generates .dot sitemap graphs. Use graphviz to see the sitemap graph. + """ + + def __init__ (self, **args): + """ + Initialize graph node list and internal id counter. + """ + super(GMLLogger, self).__init__(**args) + self.init_fileoutput(args) + self.nodes = {} + self.nodeid = 0 + + def start_output (self): + """ + Print start of checking info as DOT comment. + """ + super(DOTLogger, self).start_output() + if self.fd is None: + return + self.starttime = time.time() + if self.has_field("intro"): + self.comment(_("created by %s at %s") % \ + (linkcheck.configuration.AppName, + linkcheck.strformat.strtime(self.starttime))) + self.comment(_("Get the newest version at %(url)s") % \ + {'url': linkcheck.configuration.Url}) + self.comment(_("Write comments and bugs to %(email)s") % \ + {'email': linkcheck.configuration.Email}) + self.check_date() + self.writeln() + self.writeln(u"graph {") + self.flush() + + def comment (self, s, **args): + """ + Print DOT comment. + """ + self.write(u"// ") + self.writeln(s=s, **args) + + def new_url (self, url_data): + """ + Write one node and all possible edges. + """ + if self.fd is None: + return + node = url_data + if node.url and not self.nodes.has_key(node.url): + node.id = self.nodeid + self.nodes[node.url] = node + self.nodeid += 1 + self.writeln(u" %d [" % node.id) + if self.has_field("realurl"): + self.writeln(u' label="%s",' % dotquote(node.url)) + if node.dltime >= 0 and self.has_field("dltime"): + self.writeln(u" dltime=%d," % node.dltime) + if node.dlsize >= 0 and self.has_field("dlsize"): + self.writeln(u" dlsize=%d," % node.dlsize) + if node.checktime and self.has_field("checktime"): + self.writeln(u" checktime=%d," % node.checktime) + if self.has_field("extern"): + self.writeln(u" extern=%d," % (node.extern and 1 or 0)) + self.writeln(u" ];") + self.write_edges() + + def write_edges (self): + """ + Write all edges we can find in the graph in a brute-force + manner. Better would be a mapping of parent urls. + """ + for node in self.nodes.values(): + if self.nodes.has_key(node.parent_url): + source = self.nodes[node.parent_url].id + target = node.id + self.writeln(u" %d -> %d [" % (source, target)) + self.writeln(u' label="%s",' % dotquote(node.base_url)) + if self.has_field("result"): + self.writeln(u" valid=%d," % (node.valid and 1 or 0)) + self.writeln(u" ];") + self.flush() + + def end_output (self, linknumber=-1): + """ + Print end of checking info as DOT comment. + """ + if self.fd is None: + return + self.writeln(u"}") + if self.has_field("outro"): + self.stoptime = time.time() + duration = self.stoptime - self.starttime + self.comment(_("Stopped checking at %s (%s)")%\ + (linkcheck.strformat.strtime(self.stoptime), + linkcheck.strformat.strduration(duration))) + self.flush() + if self.close_fd: + self.fd.close() + self.fd = None + + +def dotquote (s): + return s.replace('"', '\\"') diff --git a/linkcheck/logger/gml.py b/linkcheck/logger/gml.py index 8290fd4b..2e078d13 100644 --- a/linkcheck/logger/gml.py +++ b/linkcheck/logger/gml.py @@ -27,7 +27,7 @@ import linkcheck.configuration class GMLLogger (linkcheck.logger.Logger): """ GML means Graph Modeling Language. Use a GML tool to see - your sitemap graph. + the sitemap graph. """ def __init__ (self, **args): @@ -57,9 +57,9 @@ class GMLLogger (linkcheck.logger.Logger): {'email': linkcheck.configuration.Email}) self.check_date() self.writeln() - self.writeln(u"graph [") - self.writeln(u" directed 1") - self.flush() + self.writeln(u"graph [") + self.writeln(u" directed 1") + self.flush() def comment (self, s, **args): """ diff --git a/linkchecker b/linkchecker index 82797989..9ec15763 100755 --- a/linkchecker +++ b/linkchecker @@ -118,6 +118,8 @@ html Log URLs in keyword: argument fashion, formatted as HTML. csv Log check result in CSV format with one URL per line. gml Log parent-child relations between linked URLs as a GML graph. You should use the --verbose option to get a complete graph. +dot Log parent-child relations between linked URLs as a DOT graph. + You should use the --verbose option to get a complete graph. xml Log check result as machine-readable XML file. sql Log check result as SQL script with INSERT commands. An example script to create the initial SQL table is included as create.sql. diff --git a/test.py b/test.py index 58788923..2fc004ed 100755 --- a/test.py +++ b/test.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.3 +#!/usr/bin/env python2.4 # # SchoolTool - common information systems platform for school administration # Copyright (c) 2003 Shuttleworth Foundation