diff --git a/linkcheck/checker/urlbase.py b/linkcheck/checker/urlbase.py index a8fc0f87..6beef3ec 100644 --- a/linkcheck/checker/urlbase.py +++ b/linkcheck/checker/urlbase.py @@ -31,7 +31,7 @@ import select from . import absolute_url, StoringHandler, get_url_from from ..cache import geoip from .. import (log, LOG_CHECK, LOG_CACHE, httputil, httplib2 as httplib, - strformat, LinkCheckerError, url as urlutil, trace, clamav) + strformat, LinkCheckerError, url as urlutil, trace, clamav, containers) from ..HtmlParser import htmlsax from ..htmlutil import linkparse, titleparse from .const import (WARN_URL_EFFECTIVE_URL, WARN_URL_UNICODE_DOMAIN, @@ -179,7 +179,11 @@ class UrlBase (object): """Return title of page the URL refers to. This is per default the filename or the URL.""" if self.title is None: - url = self.url if self.url else self.base_url + url = u"" + if self.base_url: + url = self.base_url + elif self.url: + url = self.url self.title = url if "/" in url: title = url.rsplit("/", 1)[1] @@ -251,8 +255,10 @@ class UrlBase (object): """ self.result = cache_data["result"] self.has_result = True - self.warnings.extend(cache_data["warnings"]) - self.info.extend(cache_data["info"]) + for tag, msg in cache_data["warnings"]: + self.add_warning(msg, tag=tag) + for info in cache_data["info"]: + self.add_info(info) self.valid = cache_data["valid"] self.dltime = cache_data["dltime"] self.dlsize = cache_data["dlsize"] @@ -954,7 +960,6 @@ class UrlBase (object): def to_wire (self): """Return a simplified transport object for logging. - XXX: at the moment, this returns just self. The transport object must contain these attributes: - url_data.valid: bool @@ -963,7 +968,7 @@ class UrlBase (object): Indicates if URL data has been loaded from cache. - url_data.result: unicode Result string - - url_data.warnings: list of (unicode, unicode) + - url_data.warnings: list of unicode List of tagged warnings for this URL. - url_data.name: unicode string or None name of URL (eg. filename or link name) @@ -986,7 +991,25 @@ class UrlBase (object): - url_data.column: int Column number of this URL at parent document, or -1 """ - return self + return containers.AttrDict(valid=self.valid, + extern=self.extern[0], + cached=self.cached, + result=self.result, + warnings=[x[1] for x in self.warnings], + name=self.name or u"", + title=self.get_title(), + parent_url=self.parent_url or u"", + base_ref=self.base_ref or u"", + base_url=self.base_url or u"", + url=self.url or u"", + checktime=self.checktime, + dltime=self.dltime, + dlsize=self.dlsize, + info=self.info, + line=self.line, + column=self.column, + cache_url_key=self.cache_url_key, + ) def filter_tidy_errors (errors): diff --git a/linkcheck/gui/__init__.py b/linkcheck/gui/__init__.py index dccd5ca6..d96abf2d 100644 --- a/linkcheck/gui/__init__.py +++ b/linkcheck/gui/__init__.py @@ -223,7 +223,7 @@ Version 2 or later.
item.setToolTip(2, url) item.setToolTip(3, name) if url_data.warnings: - text = u"\n".join([x[1] for x in url_data.warnings]) + text = u"\n".join(url_data.warnings) item.setToolTip(4, strformat.wrap(text, 60)) self.treeWidget.addTopLevelItem(item) self.num += 1 diff --git a/linkcheck/logger/csvlog.py b/linkcheck/logger/csvlog.py index 75004a51..54b28232 100644 --- a/linkcheck/logger/csvlog.py +++ b/linkcheck/logger/csvlog.py @@ -64,7 +64,6 @@ class CSVLogger (Logger): self.check_date() self.comment(_("Format of the entries:")) for s in (u"urlname", - u"recursionlevel", u"parentname", u"baseref", u"result", @@ -93,12 +92,12 @@ class CSVLogger (Logger): Write csv formatted url check info. """ row = [] - for s in (url_data.base_url or u"", url_data.recursion_level, - url_data.parent_url or u"", url_data.base_ref or u"", + for s in (url_data.base_url, + url_data.parent_url, url_data.base_ref, url_data.result, - os.linesep.join(x[1] for x in url_data.warnings), + os.linesep.join(url_data.warnings), os.linesep.join(url_data.info), - url_data.valid, url_data.url or u"", + url_data.valid, url_data.url, url_data.line, url_data.column, url_data.name, url_data.dltime, url_data.dlsize, url_data.checktime, diff --git a/linkcheck/logger/customxml.py b/linkcheck/logger/customxml.py index 32dd26bd..fa9793a2 100644 --- a/linkcheck/logger/customxml.py +++ b/linkcheck/logger/customxml.py @@ -42,22 +42,22 @@ class CustomXMLLogger (xmllog.XMLLogger): """ self.xml_starttag(u'urldata') if self.has_part('url'): - self.xml_tag(u"url", unicode(url_data.base_url or u"")) + self.xml_tag(u"url", unicode(url_data.base_url)) if url_data.name and self.has_part('name'): - self.xml_tag(u"name", unicode(url_data.name or u"")) + self.xml_tag(u"name", unicode(url_data.name)) if url_data.parent_url and self.has_part('parenturl'): attrs = { u'line': u"%d" % url_data.line, u'column': u"%d" % url_data.column, } - self.xml_tag(u"parent", unicode(url_data.parent_url or u""), + self.xml_tag(u"parent", unicode(url_data.parent_url), attrs=attrs) if url_data.base_ref and self.has_part('base'): self.xml_tag(u"baseref", unicode(url_data.base_ref)) if self.has_part("realurl"): self.xml_tag(u"realurl", unicode(url_data.url)) if self.has_part("extern"): - self.xml_tag(u"extern", u"%d" % (1 if url_data.extern[0] else 0)) + self.xml_tag(u"extern", u"%d" % (1 if url_data.extern else 0)) if url_data.dltime >= 0 and self.has_part("dltime"): self.xml_tag(u"dltime", u"%f" % url_data.dltime) if url_data.dlsize >= 0 and self.has_part("dlsize"): @@ -71,11 +71,8 @@ class CustomXMLLogger (xmllog.XMLLogger): self.xml_endtag(u"infos") if url_data.warnings and self.has_part('warning'): self.xml_starttag(u"warnings") - for tag, data in url_data.warnings: - attrs = {} - if tag is not None: - attrs["tag"] = tag - self.xml_tag(u"warning", data, attrs=attrs) + for data in url_data.warnings: + self.xml_tag(u"warning", data) self.xml_endtag(u"warnings") if self.has_part("result"): attrs = {} diff --git a/linkcheck/logger/graph.py b/linkcheck/logger/graph.py index 5ab76de3..373a2b3c 100644 --- a/linkcheck/logger/graph.py +++ b/linkcheck/logger/graph.py @@ -44,8 +44,8 @@ class GraphLogger (Logger): "url": url_data.url, "parent_url": url_data.parent_url, "id": self.nodeid, - "label": quote(u"%s (#%d)" % (url_data.get_title(), self.nodeid)), - "extern": 1 if url_data.extern[0] else 0, + "label": quote(u"%s (#%d)" % (url_data.title, self.nodeid)), + "extern": 1 if url_data.extern else 0, "checktime": url_data.checktime, "dlsize": url_data.dlsize, "dltime": url_data.dltime, diff --git a/linkcheck/logger/html.py b/linkcheck/logger/html.py index 0e52641b..0f70f585 100644 --- a/linkcheck/logger/html.py +++ b/linkcheck/logger/html.py @@ -165,7 +165,7 @@ class HtmlLogger (Logger): self.writeln(u'