mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-11 10:00:58 +00:00
Add modified field to loggers.
This commit is contained in:
parent
1db63227f6
commit
3a352631ba
14 changed files with 71 additions and 29 deletions
|
|
@ -19,5 +19,6 @@ create table linksdb (
|
|||
dltime int,
|
||||
dlsize int,
|
||||
cached int,
|
||||
level int not null
|
||||
level int not null,
|
||||
modified varchar(256)
|
||||
);
|
||||
|
|
|
|||
|
|
@ -119,6 +119,11 @@
|
|||
[gxml]
|
||||
#encoding=iso-8859-1
|
||||
|
||||
# Sitemap logger
|
||||
[sitemap]
|
||||
#priority=0.7
|
||||
#frequency=weekly
|
||||
|
||||
|
||||
##################### checking options ##########################
|
||||
[checking]
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ Changes:
|
|||
- logging: Print download and cache statistics in text output logger.
|
||||
- logging: Print warning tag in text output logger. Makes warning filtering
|
||||
more easy.
|
||||
- logging: Make the last modification time a separate field in logging
|
||||
output. See doc/upgrading.txt for compatibility changes.
|
||||
|
||||
Fixes:
|
||||
- logging: Close logger properly on I/O errors.
|
||||
|
|
|
|||
|
|
@ -1,5 +1,12 @@
|
|||
Upgrading
|
||||
=========
|
||||
Migrating from 8.0 to 8.1
|
||||
-------------------------
|
||||
All loggers have an additional output field "modified".
|
||||
If these loggers are not configured with specific output parts,
|
||||
the output format will change.
|
||||
For example existing SQL tables can be altered with:
|
||||
alter table linkcheck add (modified varchar(256));
|
||||
|
||||
Migrating from 7.9 to 8.0
|
||||
-------------------------
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ import urllib
|
|||
import urllib2
|
||||
|
||||
from . import urlbase, get_index_html, get_url_from
|
||||
from .. import log, LOG_CHECK, fileutil, LinkCheckerError, url as urlutil
|
||||
from .. import log, LOG_CHECK, fileutil, strformat, LinkCheckerError, url as urlutil
|
||||
from ..bookmarks import firefox
|
||||
from .const import WARN_FILE_MISSING_SLASH, WARN_FILE_SYSTEM_PATH
|
||||
|
||||
|
|
@ -148,14 +148,16 @@ class FileUrl (urlbase.UrlBase):
|
|||
self.url = urlutil.urlunsplit(self.urlparts)
|
||||
|
||||
def add_size_info (self):
|
||||
"""Get size of file content from filename path."""
|
||||
"""Get size of file content and modification time from filename path."""
|
||||
if self.is_directory():
|
||||
# Directory size always differs from the customer index.html
|
||||
# that is generated. So return without calculating any size.
|
||||
return
|
||||
self.size = fileutil.get_size(self.get_os_filename())
|
||||
filename = self.get_os_filename()
|
||||
self.size = fileutil.get_size(filename)
|
||||
if self.dlsize == -1:
|
||||
self.dlsize = self.size
|
||||
self.modified = strformat.strtime(fileutil.get_mtime(filename))
|
||||
|
||||
def check_connection (self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -488,9 +488,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.set_result(u"%r %s" % (response.status, response.reason))
|
||||
else:
|
||||
self.set_result(_("OK"))
|
||||
modified = self.getheader('Last-Modified', u'')
|
||||
if modified:
|
||||
self.add_info(_("Last modified %(date)s.") % {"date": modified})
|
||||
self.modified = self.getheader('Last-Modified', u'')
|
||||
|
||||
def _try_http_response (self):
|
||||
"""Try to get a HTTP response object. For reused persistent
|
||||
|
|
|
|||
|
|
@ -184,6 +184,8 @@ class UrlBase (object):
|
|||
self.info = []
|
||||
# content size
|
||||
self.size = -1
|
||||
# last modification time of content in HTTP-date format as specified in RFC2616 chapter 3.3.1
|
||||
self.modified = u""
|
||||
# download time
|
||||
self.dltime = -1
|
||||
# download size
|
||||
|
|
@ -1196,6 +1198,8 @@ class UrlBase (object):
|
|||
MIME content type for URL content.
|
||||
- url_data.level: int
|
||||
Recursion level until reaching this URL from start URL
|
||||
- url_data.last_modified: unicode
|
||||
Last modification date of retrieved page (or empty).
|
||||
"""
|
||||
return dict(valid=self.valid,
|
||||
extern=self.extern[0],
|
||||
|
|
@ -1218,6 +1222,7 @@ class UrlBase (object):
|
|||
cache_url_key=self.cache_url_key,
|
||||
content_type=self.get_content_type(),
|
||||
level=self.recursion_level,
|
||||
modified=self.modified,
|
||||
)
|
||||
|
||||
def to_wire (self):
|
||||
|
|
@ -1249,6 +1254,7 @@ urlDataAttr = [
|
|||
'dltime',
|
||||
'dlsize',
|
||||
'info',
|
||||
'modified',
|
||||
'line',
|
||||
'column',
|
||||
'cache_url_key',
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ Fields = dict(
|
|||
checktime=_("Check time"),
|
||||
url=_("URL"),
|
||||
level=_("Level"),
|
||||
modified=_("Modified"),
|
||||
)
|
||||
del _
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,12 @@ import sys
|
|||
from . import Logger
|
||||
from .. import strformat
|
||||
|
||||
Columns = (
|
||||
u"urlname", u"parentname", u"baseref", u"result", u"warningstring",
|
||||
u"infostring", u"valid", u"url", u"line", u"column", u"name",
|
||||
u"dltime", u"dlsize", u"checktime", u"cached", u"level", u"modified",
|
||||
)
|
||||
|
||||
|
||||
class CSVLogger (Logger):
|
||||
"""
|
||||
|
|
@ -68,22 +74,7 @@ class CSVLogger (Logger):
|
|||
self.writer = csv.writer(self.fd, dialect='excel',
|
||||
delimiter=self.separator, lineterminator=self.linesep,
|
||||
quotechar=self.quotechar)
|
||||
for s in (u"urlname",
|
||||
u"parentname",
|
||||
u"baseref",
|
||||
u"result",
|
||||
u"warningstring",
|
||||
u"infostring",
|
||||
u"valid",
|
||||
u"url",
|
||||
u"line",
|
||||
u"column",
|
||||
u"name",
|
||||
u"dltime",
|
||||
u"dlsize",
|
||||
u"checktime",
|
||||
u"cached",
|
||||
u"level"):
|
||||
for s in Columns:
|
||||
if self.has_part(s):
|
||||
row.append(s)
|
||||
if row:
|
||||
|
|
@ -124,6 +115,8 @@ class CSVLogger (Logger):
|
|||
row.append(url_data.cached)
|
||||
if self.has_part("level"):
|
||||
row.append(url_data.level)
|
||||
if self.has_part("modified"):
|
||||
row.append(url_data.modified)
|
||||
self.writerow(map(strformat.unicode_safe, row))
|
||||
self.flush()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2000-2011 Bastian Kleineidam
|
||||
# Copyright (C) 2000-2012 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
|
@ -71,6 +71,8 @@ class CustomXMLLogger (xmllog.XMLLogger):
|
|||
for info in url_data.info:
|
||||
self.xml_tag(u"info", info)
|
||||
self.xml_endtag(u"infos")
|
||||
if url_data.modified and self.has_part('modified'):
|
||||
self.xml_tag(u"modified", url_data.modified)
|
||||
if url_data.warnings and self.has_part('warning'):
|
||||
self.xml_starttag(u"warnings")
|
||||
for tag, data in url_data.warnings:
|
||||
|
|
|
|||
|
|
@ -119,6 +119,8 @@ class HtmlLogger (Logger):
|
|||
self.write_checktime(url_data)
|
||||
if url_data.info and self.has_part("info"):
|
||||
self.write_info(url_data)
|
||||
if url_data.modified and self.has_part("modified"):
|
||||
self.write_modified(url_data)
|
||||
if url_data.warnings and self.has_part("warning"):
|
||||
self.write_warning(url_data)
|
||||
if self.has_part("result"):
|
||||
|
|
@ -217,6 +219,12 @@ class HtmlLogger (Logger):
|
|||
self.writeln(u'<tr><td valign="top">' + self.part("info")+
|
||||
u"</td><td>"+text+u"</td></tr>")
|
||||
|
||||
def write_modified(self, url_data):
|
||||
"""Write url_data.modified."""
|
||||
text = cgi.escape(url_data.modified)
|
||||
self.writeln(u'<tr><td valign="top">' + self.part("modified") +
|
||||
u"</td><td>"+text+u"</td></tr>")
|
||||
|
||||
def write_warning (self, url_data):
|
||||
"""Write url_data.warnings."""
|
||||
sep = u"<br>"+os.linesep
|
||||
|
|
|
|||
|
|
@ -91,9 +91,10 @@ class SitemapXmlLogger (xmllog.XMLLogger):
|
|||
priority = self.priority
|
||||
self.xml_starttag(u'url')
|
||||
self.xml_tag(u'loc', url_data.url)
|
||||
# use it when last modified is part of official URL
|
||||
#if url_data.last_modified:
|
||||
# self.xml_tag(u'lastmod', url_data.last_modified)
|
||||
if url_data.modified:
|
||||
modified = get_sitemap_modified(url_data.modified)
|
||||
if modified:
|
||||
self.xml_tag(u'lastmod', modified)
|
||||
self.xml_tag(u'changefreq', self.frequency)
|
||||
self.xml_tag(u'priority', "%.1f" % priority)
|
||||
self.xml_endtag(u'url')
|
||||
|
|
@ -106,3 +107,10 @@ class SitemapXmlLogger (xmllog.XMLLogger):
|
|||
self.xml_endtag(u"urlset")
|
||||
self.xml_end_output()
|
||||
self.close_fileoutput()
|
||||
|
||||
|
||||
def get_sitemap_modified(s):
|
||||
"""Reformat UrlData modified string into sitemap format specified at
|
||||
http://www.w3.org/TR/NOTE-datetime."""
|
||||
# XXX
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2000-2011 Bastian Kleineidam
|
||||
# Copyright (C) 2000-2012 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
|
@ -99,7 +99,8 @@ class SQLLogger (Logger):
|
|||
"%(dltime)d,"
|
||||
"%(dlsize)d,"
|
||||
"%(cached)d,"
|
||||
"%(level)d"
|
||||
"%(level)d,"
|
||||
"%(modified)s"
|
||||
")%(separator)s" %
|
||||
{'table': self.dbname,
|
||||
'base_url': sqlify(url_data.base_url),
|
||||
|
|
@ -119,6 +120,7 @@ class SQLLogger (Logger):
|
|||
'cached': intify(url_data.cached),
|
||||
'separator': self.separator,
|
||||
"level": url_data.level,
|
||||
"modified": url_data.modified,
|
||||
})
|
||||
self.flush()
|
||||
|
||||
|
|
|
|||
|
|
@ -110,6 +110,8 @@ class TextLogger (Logger):
|
|||
self.write_dlsize(url_data)
|
||||
if url_data.info and self.has_part('info'):
|
||||
self.write_info(url_data)
|
||||
if url_data.modified and self.has_part('modified'):
|
||||
self.write_modified(url_data)
|
||||
if url_data.warnings and self.has_part('warning'):
|
||||
self.write_warning(url_data)
|
||||
if self.has_part('result'):
|
||||
|
|
@ -176,6 +178,11 @@ class TextLogger (Logger):
|
|||
self.write(self.part("info") + self.spaces("info"))
|
||||
self.writeln(self.wrap(url_data.info, 65), color=self.colorinfo)
|
||||
|
||||
def write_modified(self, url_data):
|
||||
"""Write url_data.modified."""
|
||||
self.write(self.part("modified") + self.spaces("modified"))
|
||||
self.writeln(url_data.modified)
|
||||
|
||||
def write_warning (self, url_data):
|
||||
"""Write url_data.warning."""
|
||||
self.write(self.part("warning") + self.spaces("warning"))
|
||||
|
|
|
|||
Loading…
Reference in a new issue