Add modified field to loggers.

This commit is contained in:
Bastian Kleineidam 2012-09-18 12:12:00 +02:00
parent 1db63227f6
commit 3a352631ba
14 changed files with 71 additions and 29 deletions

View file

@ -19,5 +19,6 @@ create table linksdb (
dltime int,
dlsize int,
cached int,
level int not null
level int not null,
modified varchar(256)
);

View file

@ -119,6 +119,11 @@
[gxml]
#encoding=iso-8859-1
# Sitemap logger
[sitemap]
#priority=0.7
#frequency=weekly
##################### checking options ##########################
[checking]

View file

@ -14,6 +14,8 @@ Changes:
- logging: Print download and cache statistics in text output logger.
- logging: Print warning tag in text output logger. Makes warning filtering
more easy.
- logging: Make the last modification time a separate field in logging
output. See doc/upgrading.txt for compatibility changes.
Fixes:
- logging: Close logger properly on I/O errors.

View file

@ -1,5 +1,12 @@
Upgrading
=========
Migrating from 8.0 to 8.1
-------------------------
All loggers have an additional output field "modified".
If these loggers are not configured with specific output parts,
the output format will change.
For example existing SQL tables can be altered with:
alter table linkcheck add (modified varchar(256));
Migrating from 7.9 to 8.0
-------------------------

View file

@ -25,7 +25,7 @@ import urllib
import urllib2
from . import urlbase, get_index_html, get_url_from
from .. import log, LOG_CHECK, fileutil, LinkCheckerError, url as urlutil
from .. import log, LOG_CHECK, fileutil, strformat, LinkCheckerError, url as urlutil
from ..bookmarks import firefox
from .const import WARN_FILE_MISSING_SLASH, WARN_FILE_SYSTEM_PATH
@ -148,14 +148,16 @@ class FileUrl (urlbase.UrlBase):
self.url = urlutil.urlunsplit(self.urlparts)
def add_size_info (self):
"""Get size of file content from filename path."""
"""Get size of file content and modification time from filename path."""
if self.is_directory():
# Directory size always differs from the customer index.html
# that is generated. So return without calculating any size.
return
self.size = fileutil.get_size(self.get_os_filename())
filename = self.get_os_filename()
self.size = fileutil.get_size(filename)
if self.dlsize == -1:
self.dlsize = self.size
self.modified = strformat.strtime(fileutil.get_mtime(filename))
def check_connection (self):
"""

View file

@ -488,9 +488,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
self.set_result(u"%r %s" % (response.status, response.reason))
else:
self.set_result(_("OK"))
modified = self.getheader('Last-Modified', u'')
if modified:
self.add_info(_("Last modified %(date)s.") % {"date": modified})
self.modified = self.getheader('Last-Modified', u'')
def _try_http_response (self):
"""Try to get a HTTP response object. For reused persistent

View file

@ -184,6 +184,8 @@ class UrlBase (object):
self.info = []
# content size
self.size = -1
# last modification time of content in HTTP-date format as specified in RFC2616 chapter 3.3.1
self.modified = u""
# download time
self.dltime = -1
# download size
@ -1196,6 +1198,8 @@ class UrlBase (object):
MIME content type for URL content.
- url_data.level: int
Recursion level until reaching this URL from start URL
- url_data.last_modified: unicode
Last modification date of retrieved page (or empty).
"""
return dict(valid=self.valid,
extern=self.extern[0],
@ -1218,6 +1222,7 @@ class UrlBase (object):
cache_url_key=self.cache_url_key,
content_type=self.get_content_type(),
level=self.recursion_level,
modified=self.modified,
)
def to_wire (self):
@ -1249,6 +1254,7 @@ urlDataAttr = [
'dltime',
'dlsize',
'info',
'modified',
'line',
'column',
'cache_url_key',

View file

@ -42,6 +42,7 @@ Fields = dict(
checktime=_("Check time"),
url=_("URL"),
level=_("Level"),
modified=_("Modified"),
)
del _

View file

@ -23,6 +23,12 @@ import sys
from . import Logger
from .. import strformat
Columns = (
u"urlname", u"parentname", u"baseref", u"result", u"warningstring",
u"infostring", u"valid", u"url", u"line", u"column", u"name",
u"dltime", u"dlsize", u"checktime", u"cached", u"level", u"modified",
)
class CSVLogger (Logger):
"""
@ -68,22 +74,7 @@ class CSVLogger (Logger):
self.writer = csv.writer(self.fd, dialect='excel',
delimiter=self.separator, lineterminator=self.linesep,
quotechar=self.quotechar)
for s in (u"urlname",
u"parentname",
u"baseref",
u"result",
u"warningstring",
u"infostring",
u"valid",
u"url",
u"line",
u"column",
u"name",
u"dltime",
u"dlsize",
u"checktime",
u"cached",
u"level"):
for s in Columns:
if self.has_part(s):
row.append(s)
if row:
@ -124,6 +115,8 @@ class CSVLogger (Logger):
row.append(url_data.cached)
if self.has_part("level"):
row.append(url_data.level)
if self.has_part("modified"):
row.append(url_data.modified)
self.writerow(map(strformat.unicode_safe, row))
self.flush()

View file

@ -1,5 +1,5 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2000-2011 Bastian Kleineidam
# Copyright (C) 2000-2012 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -71,6 +71,8 @@ class CustomXMLLogger (xmllog.XMLLogger):
for info in url_data.info:
self.xml_tag(u"info", info)
self.xml_endtag(u"infos")
if url_data.modified and self.has_part('modified'):
self.xml_tag(u"modified", url_data.modified)
if url_data.warnings and self.has_part('warning'):
self.xml_starttag(u"warnings")
for tag, data in url_data.warnings:

View file

@ -119,6 +119,8 @@ class HtmlLogger (Logger):
self.write_checktime(url_data)
if url_data.info and self.has_part("info"):
self.write_info(url_data)
if url_data.modified and self.has_part("modified"):
self.write_modified(url_data)
if url_data.warnings and self.has_part("warning"):
self.write_warning(url_data)
if self.has_part("result"):
@ -217,6 +219,12 @@ class HtmlLogger (Logger):
self.writeln(u'<tr><td valign="top">' + self.part("info")+
u"</td><td>"+text+u"</td></tr>")
def write_modified(self, url_data):
"""Write url_data.modified."""
text = cgi.escape(url_data.modified)
self.writeln(u'<tr><td valign="top">' + self.part("modified") +
u"</td><td>"+text+u"</td></tr>")
def write_warning (self, url_data):
"""Write url_data.warnings."""
sep = u"<br>"+os.linesep

View file

@ -91,9 +91,10 @@ class SitemapXmlLogger (xmllog.XMLLogger):
priority = self.priority
self.xml_starttag(u'url')
self.xml_tag(u'loc', url_data.url)
# use it when last modified is part of official URL
#if url_data.last_modified:
# self.xml_tag(u'lastmod', url_data.last_modified)
if url_data.modified:
modified = get_sitemap_modified(url_data.modified)
if modified:
self.xml_tag(u'lastmod', modified)
self.xml_tag(u'changefreq', self.frequency)
self.xml_tag(u'priority', "%.1f" % priority)
self.xml_endtag(u'url')
@ -106,3 +107,10 @@ class SitemapXmlLogger (xmllog.XMLLogger):
self.xml_endtag(u"urlset")
self.xml_end_output()
self.close_fileoutput()
def get_sitemap_modified(s):
"""Reformat UrlData modified string into sitemap format specified at
http://www.w3.org/TR/NOTE-datetime."""
# XXX
return None

View file

@ -1,5 +1,5 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2000-2011 Bastian Kleineidam
# Copyright (C) 2000-2012 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -99,7 +99,8 @@ class SQLLogger (Logger):
"%(dltime)d,"
"%(dlsize)d,"
"%(cached)d,"
"%(level)d"
"%(level)d,"
"%(modified)s"
")%(separator)s" %
{'table': self.dbname,
'base_url': sqlify(url_data.base_url),
@ -119,6 +120,7 @@ class SQLLogger (Logger):
'cached': intify(url_data.cached),
'separator': self.separator,
"level": url_data.level,
"modified": url_data.modified,
})
self.flush()

View file

@ -110,6 +110,8 @@ class TextLogger (Logger):
self.write_dlsize(url_data)
if url_data.info and self.has_part('info'):
self.write_info(url_data)
if url_data.modified and self.has_part('modified'):
self.write_modified(url_data)
if url_data.warnings and self.has_part('warning'):
self.write_warning(url_data)
if self.has_part('result'):
@ -176,6 +178,11 @@ class TextLogger (Logger):
self.write(self.part("info") + self.spaces("info"))
self.writeln(self.wrap(url_data.info, 65), color=self.colorinfo)
def write_modified(self, url_data):
"""Write url_data.modified."""
self.write(self.part("modified") + self.spaces("modified"))
self.writeln(url_data.modified)
def write_warning (self, url_data):
"""Write url_data.warning."""
self.write(self.part("warning") + self.spaces("warning"))