From 59ffbd43f0080fb0d8545fcf87541ac848d22bad Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Sat, 7 Mar 2009 09:43:55 +0100 Subject: [PATCH] Use AttrDict for transport object in loggers. --- linkcheck/checker/urlbase.py | 37 +++++++++++++++++++++++------ linkcheck/gui/__init__.py | 2 +- linkcheck/logger/csvlog.py | 9 ++++--- linkcheck/logger/customxml.py | 15 +++++------- linkcheck/logger/graph.py | 4 ++-- linkcheck/logger/html.py | 4 ++-- linkcheck/logger/sql.py | 15 +++++------- linkcheck/logger/text.py | 5 ++-- tests/checker/__init__.py | 4 ++-- tests/checker/data/misc.html.result | 4 ++-- 10 files changed, 57 insertions(+), 42 deletions(-) diff --git a/linkcheck/checker/urlbase.py b/linkcheck/checker/urlbase.py index a8fc0f87..6beef3ec 100644 --- a/linkcheck/checker/urlbase.py +++ b/linkcheck/checker/urlbase.py @@ -31,7 +31,7 @@ import select from . import absolute_url, StoringHandler, get_url_from from ..cache import geoip from .. import (log, LOG_CHECK, LOG_CACHE, httputil, httplib2 as httplib, - strformat, LinkCheckerError, url as urlutil, trace, clamav) + strformat, LinkCheckerError, url as urlutil, trace, clamav, containers) from ..HtmlParser import htmlsax from ..htmlutil import linkparse, titleparse from .const import (WARN_URL_EFFECTIVE_URL, WARN_URL_UNICODE_DOMAIN, @@ -179,7 +179,11 @@ class UrlBase (object): """Return title of page the URL refers to. This is per default the filename or the URL.""" if self.title is None: - url = self.url if self.url else self.base_url + url = u"" + if self.base_url: + url = self.base_url + elif self.url: + url = self.url self.title = url if "/" in url: title = url.rsplit("/", 1)[1] @@ -251,8 +255,10 @@ class UrlBase (object): """ self.result = cache_data["result"] self.has_result = True - self.warnings.extend(cache_data["warnings"]) - self.info.extend(cache_data["info"]) + for tag, msg in cache_data["warnings"]: + self.add_warning(msg, tag=tag) + for info in cache_data["info"]: + self.add_info(info) self.valid = cache_data["valid"] self.dltime = cache_data["dltime"] self.dlsize = cache_data["dlsize"] @@ -954,7 +960,6 @@ class UrlBase (object): def to_wire (self): """Return a simplified transport object for logging. - XXX: at the moment, this returns just self. The transport object must contain these attributes: - url_data.valid: bool @@ -963,7 +968,7 @@ class UrlBase (object): Indicates if URL data has been loaded from cache. - url_data.result: unicode Result string - - url_data.warnings: list of (unicode, unicode) + - url_data.warnings: list of unicode List of tagged warnings for this URL. - url_data.name: unicode string or None name of URL (eg. filename or link name) @@ -986,7 +991,25 @@ class UrlBase (object): - url_data.column: int Column number of this URL at parent document, or -1 """ - return self + return containers.AttrDict(valid=self.valid, + extern=self.extern[0], + cached=self.cached, + result=self.result, + warnings=[x[1] for x in self.warnings], + name=self.name or u"", + title=self.get_title(), + parent_url=self.parent_url or u"", + base_ref=self.base_ref or u"", + base_url=self.base_url or u"", + url=self.url or u"", + checktime=self.checktime, + dltime=self.dltime, + dlsize=self.dlsize, + info=self.info, + line=self.line, + column=self.column, + cache_url_key=self.cache_url_key, + ) def filter_tidy_errors (errors): diff --git a/linkcheck/gui/__init__.py b/linkcheck/gui/__init__.py index dccd5ca6..d96abf2d 100644 --- a/linkcheck/gui/__init__.py +++ b/linkcheck/gui/__init__.py @@ -223,7 +223,7 @@ Version 2 or later.

item.setToolTip(2, url) item.setToolTip(3, name) if url_data.warnings: - text = u"\n".join([x[1] for x in url_data.warnings]) + text = u"\n".join(url_data.warnings) item.setToolTip(4, strformat.wrap(text, 60)) self.treeWidget.addTopLevelItem(item) self.num += 1 diff --git a/linkcheck/logger/csvlog.py b/linkcheck/logger/csvlog.py index 75004a51..54b28232 100644 --- a/linkcheck/logger/csvlog.py +++ b/linkcheck/logger/csvlog.py @@ -64,7 +64,6 @@ class CSVLogger (Logger): self.check_date() self.comment(_("Format of the entries:")) for s in (u"urlname", - u"recursionlevel", u"parentname", u"baseref", u"result", @@ -93,12 +92,12 @@ class CSVLogger (Logger): Write csv formatted url check info. """ row = [] - for s in (url_data.base_url or u"", url_data.recursion_level, - url_data.parent_url or u"", url_data.base_ref or u"", + for s in (url_data.base_url, + url_data.parent_url, url_data.base_ref, url_data.result, - os.linesep.join(x[1] for x in url_data.warnings), + os.linesep.join(url_data.warnings), os.linesep.join(url_data.info), - url_data.valid, url_data.url or u"", + url_data.valid, url_data.url, url_data.line, url_data.column, url_data.name, url_data.dltime, url_data.dlsize, url_data.checktime, diff --git a/linkcheck/logger/customxml.py b/linkcheck/logger/customxml.py index 32dd26bd..fa9793a2 100644 --- a/linkcheck/logger/customxml.py +++ b/linkcheck/logger/customxml.py @@ -42,22 +42,22 @@ class CustomXMLLogger (xmllog.XMLLogger): """ self.xml_starttag(u'urldata') if self.has_part('url'): - self.xml_tag(u"url", unicode(url_data.base_url or u"")) + self.xml_tag(u"url", unicode(url_data.base_url)) if url_data.name and self.has_part('name'): - self.xml_tag(u"name", unicode(url_data.name or u"")) + self.xml_tag(u"name", unicode(url_data.name)) if url_data.parent_url and self.has_part('parenturl'): attrs = { u'line': u"%d" % url_data.line, u'column': u"%d" % url_data.column, } - self.xml_tag(u"parent", unicode(url_data.parent_url or u""), + self.xml_tag(u"parent", unicode(url_data.parent_url), attrs=attrs) if url_data.base_ref and self.has_part('base'): self.xml_tag(u"baseref", unicode(url_data.base_ref)) if self.has_part("realurl"): self.xml_tag(u"realurl", unicode(url_data.url)) if self.has_part("extern"): - self.xml_tag(u"extern", u"%d" % (1 if url_data.extern[0] else 0)) + self.xml_tag(u"extern", u"%d" % (1 if url_data.extern else 0)) if url_data.dltime >= 0 and self.has_part("dltime"): self.xml_tag(u"dltime", u"%f" % url_data.dltime) if url_data.dlsize >= 0 and self.has_part("dlsize"): @@ -71,11 +71,8 @@ class CustomXMLLogger (xmllog.XMLLogger): self.xml_endtag(u"infos") if url_data.warnings and self.has_part('warning'): self.xml_starttag(u"warnings") - for tag, data in url_data.warnings: - attrs = {} - if tag is not None: - attrs["tag"] = tag - self.xml_tag(u"warning", data, attrs=attrs) + for data in url_data.warnings: + self.xml_tag(u"warning", data) self.xml_endtag(u"warnings") if self.has_part("result"): attrs = {} diff --git a/linkcheck/logger/graph.py b/linkcheck/logger/graph.py index 5ab76de3..373a2b3c 100644 --- a/linkcheck/logger/graph.py +++ b/linkcheck/logger/graph.py @@ -44,8 +44,8 @@ class GraphLogger (Logger): "url": url_data.url, "parent_url": url_data.parent_url, "id": self.nodeid, - "label": quote(u"%s (#%d)" % (url_data.get_title(), self.nodeid)), - "extern": 1 if url_data.extern[0] else 0, + "label": quote(u"%s (#%d)" % (url_data.title, self.nodeid)), + "extern": 1 if url_data.extern else 0, "checktime": url_data.checktime, "dlsize": url_data.dlsize, "dltime": url_data.dltime, diff --git a/linkcheck/logger/html.py b/linkcheck/logger/html.py index 0e52641b..0f70f585 100644 --- a/linkcheck/logger/html.py +++ b/linkcheck/logger/html.py @@ -165,7 +165,7 @@ class HtmlLogger (Logger): self.writeln(u'%s' % (self.colorurl, self.part("url"))) self.write(u'' % self.colorurl) - self.write(u"`%s'" % cgi.escape(url_data.base_url or u"")) + self.write(u"`%s'" % cgi.escape(url_data.base_url)) if url_data.cached: self.write(_(" (cached)")) self.writeln(u"") @@ -248,7 +248,7 @@ class HtmlLogger (Logger): Write url_data.warnings. """ sep = u"
"+os.linesep - text = sep.join(cgi.escape(x[1]) for x in url_data.warnings) + text = sep.join(cgi.escape(x) for x in url_data.warnings) self.writeln(u'' + self.part("warning") + u'' + diff --git a/linkcheck/logger/sql.py b/linkcheck/logger/sql.py index be9e1bac..5dae37c9 100644 --- a/linkcheck/logger/sql.py +++ b/linkcheck/logger/sql.py @@ -90,12 +90,10 @@ class SQLLogger (Logger): """ Store url check info into the database. """ - log_warnings = (x[1] for x in url_data.warnings) - self.writeln(u"insert into %(table)s(urlname,recursionlevel," + self.writeln(u"insert into %(table)s(urlname," "parentname,baseref,valid,result,warning,info,url,line,col," "name,checktime,dltime,dlsize,cached) values (" "%(base_url)s," - "%(recursion_level)d," "%(url_parent)s," "%(base_ref)s," "%(valid)d," @@ -112,15 +110,14 @@ class SQLLogger (Logger): "%(cached)d" ")%(separator)s" % {'table': self.dbname, - 'base_url': sqlify(url_data.base_url or u""), - 'recursion_level': url_data.recursion_level, - 'url_parent': sqlify((url_data.parent_url or u"")), - 'base_ref': sqlify((url_data.base_ref or u"")), + 'base_url': sqlify(url_data.base_url), + 'url_parent': sqlify((url_data.parent_url)), + 'base_ref': sqlify((url_data.base_ref)), 'valid': intify(url_data.valid), 'result': sqlify(url_data.result), - 'warning': sqlify(os.linesep.join(log_warnings)), + 'warning': sqlify(os.linesep.join(url_data.warnings)), 'info': sqlify(os.linesep.join(url_data.info)), - 'url': sqlify(urlutil.url_quote(url_data.url or u"")), + 'url': sqlify(urlutil.url_quote(url_data.url)), 'line': url_data.line, 'column': url_data.column, 'name': sqlify(url_data.name), diff --git a/linkcheck/logger/text.py b/linkcheck/logger/text.py index 53f2d34b..e4a3824c 100644 --- a/linkcheck/logger/text.py +++ b/linkcheck/logger/text.py @@ -142,7 +142,7 @@ class TextLogger (Logger): """ self.writeln() self.write(self.part('url') + self.spaces('url')) - txt = strformat.strline(url_data.base_url or u"") + txt = strformat.strline(url_data.base_url) if url_data.cached: txt += _(" (cached)") self.writeln(txt, color=self.colorurl) @@ -214,8 +214,7 @@ class TextLogger (Logger): Write url_data.warning. """ self.write(self.part("warning") + self.spaces("warning")) - text = [x[1] for x in url_data.warnings] - self.writeln(self.wrap(text, 65), color=self.colorwarning) + self.writeln(self.wrap(url_data.warnings, 65), color=self.colorwarning) def write_result (self, url_data): """ diff --git a/tests/checker/__init__.py b/tests/checker/__init__.py index 1ff42a60..0114047d 100644 --- a/tests/checker/__init__.py +++ b/tests/checker/__init__.py @@ -60,7 +60,7 @@ class TestLogger (linkcheck.logger.Logger): Append logger output to self.result. """ if self.has_part('url'): - url = u"url %s" % (url_data.base_url or u"") + url = u"url %s" % url_data.base_url if url_data.cached: url += u" (cached)" self.result.append(url) @@ -79,7 +79,7 @@ class TestLogger (linkcheck.logger.Logger): self.result.append(u"info %s" % info) if self.has_part('warning'): for warning in url_data.warnings: - self.result.append(u"warning %s" % warning[1]) + self.result.append(u"warning %s" % warning) if self.has_part('result'): self.result.append(u"valid" if url_data.valid else u"error") # note: do not append url_data.result since this is diff --git a/tests/checker/data/misc.html.result b/tests/checker/data/misc.html.result index ebbd7c98..5fc4a2fc 100644 --- a/tests/checker/data/misc.html.result +++ b/tests/checker/data/misc.html.result @@ -21,13 +21,13 @@ valid url http://imadööfus.org%%0D%%3Cfont%%20face=%%22Verdana,%%20Arial,%%20Helvetica,%%20sans-serif%%22%%20size=%%222%%22%%3E%%3Chttp://www.imadoofus.org%%3E%%20%%0D%%20%%20%%20%%20%%20%%20%%20%%20%%20%%20%%20%%20%%20%%20 cache key None -real url None +real url name blubb error url cache key None -real url None +real url error url test.swf