Fix tests.

This commit is contained in:
Bastian Kleineidam 2012-09-19 11:05:26 +02:00
parent b8f8bdf5fc
commit 18a200d85f
9 changed files with 36 additions and 35 deletions

View file

@ -492,7 +492,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
else:
self.set_result(_("OK"))
modified = rfc822.parsedate(self.getheader('Last-Modified', u''))
self.modified = datetime.utcfromtimestamp(time.mktime(modified))
if modified:
self.modified = datetime.utcfromtimestamp(time.mktime(modified))
def _try_http_response (self):
"""Try to get a HTTP response object. For reused persistent

View file

@ -185,7 +185,7 @@ class UrlBase (object):
# content size
self.size = -1
# last modification time of content in HTTP-date format as specified in RFC2616 chapter 3.3.1
self.modified = u""
self.modified = None
# download time
self.dltime = -1
# download size
@ -1198,8 +1198,8 @@ class UrlBase (object):
MIME content type for URL content.
- url_data.level: int
Recursion level until reaching this URL from start URL
- url_data.last_modified: unicode
Last modification date of retrieved page (or empty).
- url_data.last_modified: datetime
Last modification date of retrieved page (or None).
"""
return dict(valid=self.valid,
extern=self.extern[0],

View file

@ -437,6 +437,17 @@ class Logger (object):
self.stats.addrinfo_stats = addrinfo_stats
self.stats.downloaded_bytes = download_stats
def format_modified(self, modified, sep=" "):
"""Format modification date if it's not None.
@param modified: modification date
@ptype modified: datetime or None
@return: formatted date or empty string
@rtype: unicode
"""
if modified is not None:
return modified.isoformat(sep)
return u""
# the standard URL logger implementations
from .text import TextLogger

View file

@ -116,7 +116,7 @@ class CSVLogger (Logger):
if self.has_part("level"):
row.append(url_data.level)
if self.has_part("modified"):
row.append(url_data.modified)
row.append(self.format_modified(url_data.modified))
self.writerow(map(strformat.unicode_safe, row))
self.flush()

View file

@ -72,7 +72,7 @@ class CustomXMLLogger (xmllog.XMLLogger):
self.xml_tag(u"info", info)
self.xml_endtag(u"infos")
if url_data.modified and self.has_part('modified'):
self.xml_tag(u"modified", url_data.modified)
self.xml_tag(u"modified", self.format_modified(url_data.modified))
if url_data.warnings and self.has_part('warning'):
self.xml_starttag(u"warnings")
for tag, data in url_data.warnings:

View file

@ -221,7 +221,7 @@ class HtmlLogger (Logger):
def write_modified(self, url_data):
"""Write url_data.modified."""
text = cgi.escape(url_data.modified.isoformat(" "))
text = cgi.escape(self.format_modified(url_data.modified))
self.writeln(u'<tr><td valign="top">' + self.part("modified") +
u"</td><td>"+text+u"</td></tr>")

View file

@ -67,34 +67,33 @@ class SitemapXmlLogger (xmllog.XMLLogger):
Update accounting data and determine if URL should be included in the sitemap.
"""
self.stats.log_url(url_data, do_print)
# ignore the do_print flag and determine ourselves if we filter the url
if (url_data.valid and
url_data.url.startswith((u'http:', u'https:')) and
url_data.url.startswith(self.prefix) and
url_data.content_type in ('text/html', "application/xhtml+xml")):
self.log_url(url_data)
def log_url (self, url_data):
"""
Log URL data in sitemap format.
"""
# initialize prefix and priority
if self.prefix is None:
# first URL (ie. the homepage) gets priority 1.0 per default
self.prefix = url_data.url
# first URL (ie. the homepage) gets priority 1.0 per default
priority = 1.0
else:
# all other pages get priority 0.5 per default
priority = 0.5
if self.priority is not None:
priority = self.priority
# ignore the do_print flag and determine ourselves if we filter the url
if (url_data.valid and
url_data.url.startswith((u'http:', u'https:')) and
url_data.url.startswith(self.prefix) and
url_data.content_type in ('text/html', "application/xhtml+xml")):
self.log_url(url_data, priority=priority)
def log_url (self, url_data, priority=None):
"""
Log URL data in sitemap format.
"""
self.xml_starttag(u'url')
self.xml_tag(u'loc', url_data.url)
if url_data.modified:
modified = get_sitemap_modified(url_data.modified)
if modified:
self.xml_tag(u'lastmod', modified)
self.xml_tag(u'lastmod', self.format_modified(url_data.modified, sep="T"))
self.xml_tag(u'changefreq', self.frequency)
self.xml_tag(u'priority', "%.1f" % priority)
self.xml_tag(u'priority', "%.2f" % priority)
self.xml_endtag(u'url')
self.flush()
@ -106,13 +105,3 @@ class SitemapXmlLogger (xmllog.XMLLogger):
self.xml_end_output()
self.close_fileoutput()
def get_sitemap_modified(modified):
"""Reformat UrlData modified string into sitemap format specified at
http://www.w3.org/TR/NOTE-datetime.
@param modified: last modified time
@ptype modified: datetime object with timezone information
@return: formatted date
@rtype: string
"""
return modified.isoformat('T')

View file

@ -120,7 +120,7 @@ class SQLLogger (Logger):
'cached': intify(url_data.cached),
'separator': self.separator,
"level": url_data.level,
"modified": url_data.modified,
"modified": sqlify(self.format_modified(url_data.modified)),
})
self.flush()

View file

@ -181,7 +181,7 @@ class TextLogger (Logger):
def write_modified(self, url_data):
"""Write url_data.modified."""
self.write(self.part("modified") + self.spaces("modified"))
self.writeln(url_data.modified.isoformat(" "))
self.writeln(self.format_modified(url_data.modified))
def write_warning (self, url_data):
"""Write url_data.warning."""