mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-17 06:20:27 +00:00
Fix tests.
This commit is contained in:
parent
b8f8bdf5fc
commit
18a200d85f
9 changed files with 36 additions and 35 deletions
|
|
@ -492,7 +492,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
else:
|
||||
self.set_result(_("OK"))
|
||||
modified = rfc822.parsedate(self.getheader('Last-Modified', u''))
|
||||
self.modified = datetime.utcfromtimestamp(time.mktime(modified))
|
||||
if modified:
|
||||
self.modified = datetime.utcfromtimestamp(time.mktime(modified))
|
||||
|
||||
def _try_http_response (self):
|
||||
"""Try to get a HTTP response object. For reused persistent
|
||||
|
|
|
|||
|
|
@ -185,7 +185,7 @@ class UrlBase (object):
|
|||
# content size
|
||||
self.size = -1
|
||||
# last modification time of content in HTTP-date format as specified in RFC2616 chapter 3.3.1
|
||||
self.modified = u""
|
||||
self.modified = None
|
||||
# download time
|
||||
self.dltime = -1
|
||||
# download size
|
||||
|
|
@ -1198,8 +1198,8 @@ class UrlBase (object):
|
|||
MIME content type for URL content.
|
||||
- url_data.level: int
|
||||
Recursion level until reaching this URL from start URL
|
||||
- url_data.last_modified: unicode
|
||||
Last modification date of retrieved page (or empty).
|
||||
- url_data.last_modified: datetime
|
||||
Last modification date of retrieved page (or None).
|
||||
"""
|
||||
return dict(valid=self.valid,
|
||||
extern=self.extern[0],
|
||||
|
|
|
|||
|
|
@ -437,6 +437,17 @@ class Logger (object):
|
|||
self.stats.addrinfo_stats = addrinfo_stats
|
||||
self.stats.downloaded_bytes = download_stats
|
||||
|
||||
def format_modified(self, modified, sep=" "):
|
||||
"""Format modification date if it's not None.
|
||||
@param modified: modification date
|
||||
@ptype modified: datetime or None
|
||||
@return: formatted date or empty string
|
||||
@rtype: unicode
|
||||
"""
|
||||
if modified is not None:
|
||||
return modified.isoformat(sep)
|
||||
return u""
|
||||
|
||||
|
||||
# the standard URL logger implementations
|
||||
from .text import TextLogger
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ class CSVLogger (Logger):
|
|||
if self.has_part("level"):
|
||||
row.append(url_data.level)
|
||||
if self.has_part("modified"):
|
||||
row.append(url_data.modified)
|
||||
row.append(self.format_modified(url_data.modified))
|
||||
self.writerow(map(strformat.unicode_safe, row))
|
||||
self.flush()
|
||||
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ class CustomXMLLogger (xmllog.XMLLogger):
|
|||
self.xml_tag(u"info", info)
|
||||
self.xml_endtag(u"infos")
|
||||
if url_data.modified and self.has_part('modified'):
|
||||
self.xml_tag(u"modified", url_data.modified)
|
||||
self.xml_tag(u"modified", self.format_modified(url_data.modified))
|
||||
if url_data.warnings and self.has_part('warning'):
|
||||
self.xml_starttag(u"warnings")
|
||||
for tag, data in url_data.warnings:
|
||||
|
|
|
|||
|
|
@ -221,7 +221,7 @@ class HtmlLogger (Logger):
|
|||
|
||||
def write_modified(self, url_data):
|
||||
"""Write url_data.modified."""
|
||||
text = cgi.escape(url_data.modified.isoformat(" "))
|
||||
text = cgi.escape(self.format_modified(url_data.modified))
|
||||
self.writeln(u'<tr><td valign="top">' + self.part("modified") +
|
||||
u"</td><td>"+text+u"</td></tr>")
|
||||
|
||||
|
|
|
|||
|
|
@ -67,34 +67,33 @@ class SitemapXmlLogger (xmllog.XMLLogger):
|
|||
Update accounting data and determine if URL should be included in the sitemap.
|
||||
"""
|
||||
self.stats.log_url(url_data, do_print)
|
||||
# ignore the do_print flag and determine ourselves if we filter the url
|
||||
if (url_data.valid and
|
||||
url_data.url.startswith((u'http:', u'https:')) and
|
||||
url_data.url.startswith(self.prefix) and
|
||||
url_data.content_type in ('text/html', "application/xhtml+xml")):
|
||||
self.log_url(url_data)
|
||||
|
||||
def log_url (self, url_data):
|
||||
"""
|
||||
Log URL data in sitemap format.
|
||||
"""
|
||||
# initialize prefix and priority
|
||||
if self.prefix is None:
|
||||
# first URL (ie. the homepage) gets priority 1.0 per default
|
||||
self.prefix = url_data.url
|
||||
# first URL (ie. the homepage) gets priority 1.0 per default
|
||||
priority = 1.0
|
||||
else:
|
||||
# all other pages get priority 0.5 per default
|
||||
priority = 0.5
|
||||
if self.priority is not None:
|
||||
priority = self.priority
|
||||
# ignore the do_print flag and determine ourselves if we filter the url
|
||||
if (url_data.valid and
|
||||
url_data.url.startswith((u'http:', u'https:')) and
|
||||
url_data.url.startswith(self.prefix) and
|
||||
url_data.content_type in ('text/html', "application/xhtml+xml")):
|
||||
self.log_url(url_data, priority=priority)
|
||||
|
||||
def log_url (self, url_data, priority=None):
|
||||
"""
|
||||
Log URL data in sitemap format.
|
||||
"""
|
||||
self.xml_starttag(u'url')
|
||||
self.xml_tag(u'loc', url_data.url)
|
||||
if url_data.modified:
|
||||
modified = get_sitemap_modified(url_data.modified)
|
||||
if modified:
|
||||
self.xml_tag(u'lastmod', modified)
|
||||
self.xml_tag(u'lastmod', self.format_modified(url_data.modified, sep="T"))
|
||||
self.xml_tag(u'changefreq', self.frequency)
|
||||
self.xml_tag(u'priority', "%.1f" % priority)
|
||||
self.xml_tag(u'priority', "%.2f" % priority)
|
||||
self.xml_endtag(u'url')
|
||||
self.flush()
|
||||
|
||||
|
|
@ -106,13 +105,3 @@ class SitemapXmlLogger (xmllog.XMLLogger):
|
|||
self.xml_end_output()
|
||||
self.close_fileoutput()
|
||||
|
||||
|
||||
def get_sitemap_modified(modified):
|
||||
"""Reformat UrlData modified string into sitemap format specified at
|
||||
http://www.w3.org/TR/NOTE-datetime.
|
||||
@param modified: last modified time
|
||||
@ptype modified: datetime object with timezone information
|
||||
@return: formatted date
|
||||
@rtype: string
|
||||
"""
|
||||
return modified.isoformat('T')
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ class SQLLogger (Logger):
|
|||
'cached': intify(url_data.cached),
|
||||
'separator': self.separator,
|
||||
"level": url_data.level,
|
||||
"modified": url_data.modified,
|
||||
"modified": sqlify(self.format_modified(url_data.modified)),
|
||||
})
|
||||
self.flush()
|
||||
|
||||
|
|
|
|||
|
|
@ -181,7 +181,7 @@ class TextLogger (Logger):
|
|||
def write_modified(self, url_data):
|
||||
"""Write url_data.modified."""
|
||||
self.write(self.part("modified") + self.spaces("modified"))
|
||||
self.writeln(url_data.modified.isoformat(" "))
|
||||
self.writeln(self.format_modified(url_data.modified))
|
||||
|
||||
def write_warning (self, url_data):
|
||||
"""Write url_data.warning."""
|
||||
|
|
|
|||
Loading…
Reference in a new issue