diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py index 62d30594..244fd441 100644 --- a/linkcheck/checker/httpurl.py +++ b/linkcheck/checker/httpurl.py @@ -492,7 +492,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): else: self.set_result(_("OK")) modified = rfc822.parsedate(self.getheader('Last-Modified', u'')) - self.modified = datetime.utcfromtimestamp(time.mktime(modified)) + if modified: + self.modified = datetime.utcfromtimestamp(time.mktime(modified)) def _try_http_response (self): """Try to get a HTTP response object. For reused persistent diff --git a/linkcheck/checker/urlbase.py b/linkcheck/checker/urlbase.py index 37ccf04c..ce11b592 100644 --- a/linkcheck/checker/urlbase.py +++ b/linkcheck/checker/urlbase.py @@ -185,7 +185,7 @@ class UrlBase (object): # content size self.size = -1 # last modification time of content in HTTP-date format as specified in RFC2616 chapter 3.3.1 - self.modified = u"" + self.modified = None # download time self.dltime = -1 # download size @@ -1198,8 +1198,8 @@ class UrlBase (object): MIME content type for URL content. - url_data.level: int Recursion level until reaching this URL from start URL - - url_data.last_modified: unicode - Last modification date of retrieved page (or empty). + - url_data.last_modified: datetime + Last modification date of retrieved page (or None). """ return dict(valid=self.valid, extern=self.extern[0], diff --git a/linkcheck/logger/__init__.py b/linkcheck/logger/__init__.py index 6102253c..df43b90c 100644 --- a/linkcheck/logger/__init__.py +++ b/linkcheck/logger/__init__.py @@ -437,6 +437,17 @@ class Logger (object): self.stats.addrinfo_stats = addrinfo_stats self.stats.downloaded_bytes = download_stats + def format_modified(self, modified, sep=" "): + """Format modification date if it's not None. + @param modified: modification date + @ptype modified: datetime or None + @return: formatted date or empty string + @rtype: unicode + """ + if modified is not None: + return modified.isoformat(sep) + return u"" + # the standard URL logger implementations from .text import TextLogger diff --git a/linkcheck/logger/csvlog.py b/linkcheck/logger/csvlog.py index 30387e65..b8cdbfec 100644 --- a/linkcheck/logger/csvlog.py +++ b/linkcheck/logger/csvlog.py @@ -116,7 +116,7 @@ class CSVLogger (Logger): if self.has_part("level"): row.append(url_data.level) if self.has_part("modified"): - row.append(url_data.modified) + row.append(self.format_modified(url_data.modified)) self.writerow(map(strformat.unicode_safe, row)) self.flush() diff --git a/linkcheck/logger/customxml.py b/linkcheck/logger/customxml.py index d6b7d43b..1be55735 100644 --- a/linkcheck/logger/customxml.py +++ b/linkcheck/logger/customxml.py @@ -72,7 +72,7 @@ class CustomXMLLogger (xmllog.XMLLogger): self.xml_tag(u"info", info) self.xml_endtag(u"infos") if url_data.modified and self.has_part('modified'): - self.xml_tag(u"modified", url_data.modified) + self.xml_tag(u"modified", self.format_modified(url_data.modified)) if url_data.warnings and self.has_part('warning'): self.xml_starttag(u"warnings") for tag, data in url_data.warnings: diff --git a/linkcheck/logger/html.py b/linkcheck/logger/html.py index d30b1bbd..45b280e7 100644 --- a/linkcheck/logger/html.py +++ b/linkcheck/logger/html.py @@ -221,7 +221,7 @@ class HtmlLogger (Logger): def write_modified(self, url_data): """Write url_data.modified.""" - text = cgi.escape(url_data.modified.isoformat(" ")) + text = cgi.escape(self.format_modified(url_data.modified)) self.writeln(u'