mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-16 22:10:26 +00:00
Fix missing content types for cached URLs.
This commit is contained in:
parent
e8d5bbe4be
commit
fd3fe8dcaa
5 changed files with 23 additions and 8 deletions
|
|
@ -7,6 +7,7 @@ Fixes:
|
|||
internal links when given as start URL.
|
||||
- logging: Allow Unicode strings to be written to stdout without
|
||||
encoding errors on Unix systems.
|
||||
- logging: Fix missing content type for cached URLs.
|
||||
- gui: Reset statistics before each run.
|
||||
|
||||
Changes:
|
||||
|
|
|
|||
|
|
@ -255,9 +255,12 @@ class FileUrl (urlbase.UrlBase):
|
|||
self.aggregate.urlqueue.put(url_data)
|
||||
|
||||
def get_content_type (self):
|
||||
if self.url:
|
||||
return fileutil.guess_mimetype(self.url, read=self.get_content)
|
||||
return u""
|
||||
if self.content_type is None:
|
||||
if self.url:
|
||||
self.content_type = fileutil.guess_mimetype(self.url, read=self.get_content)
|
||||
else:
|
||||
self.content_type = u""
|
||||
return self.content_type
|
||||
|
||||
def get_intern_pattern (self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -206,7 +206,9 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
getattr(self, "parse_"+key)()
|
||||
|
||||
def get_content_type (self, read=None):
|
||||
return fileutil.guess_mimetype(self.url, read=read)
|
||||
if self.content_type is None:
|
||||
self.content_type = fileutil.guess_mimetype(self.url, read=read)
|
||||
return self.content_type
|
||||
|
||||
def read_content (self):
|
||||
"""Return URL target content, or in case of directories a dummy HTML
|
||||
|
|
|
|||
|
|
@ -284,9 +284,12 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
|
||||
def get_content_type (self):
|
||||
"""Return content MIME type or empty string."""
|
||||
if self.headers:
|
||||
return headers.get_content_type(self.headers)
|
||||
return u""
|
||||
if self.content_type is None:
|
||||
if self.headers:
|
||||
self.content_type = headers.get_content_type(self.headers)
|
||||
else:
|
||||
self.content_type = u""
|
||||
return self.content_type
|
||||
|
||||
def follow_redirections (self, response, set_result=True):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -179,6 +179,8 @@ class UrlBase (object):
|
|||
self.title = None
|
||||
# flag if content should be checked or not
|
||||
self.do_check_content = True
|
||||
# MIME content type
|
||||
self.content_type = None
|
||||
|
||||
def set_result (self, msg, valid=True, overwrite=False):
|
||||
"""
|
||||
|
|
@ -295,6 +297,7 @@ class UrlBase (object):
|
|||
self.dltime = cache_data["dltime"]
|
||||
self.dlsize = cache_data["dlsize"]
|
||||
self.anchors = cache_data["anchors"]
|
||||
self.content_type = cache_data["content_type"]
|
||||
self.cached = True
|
||||
if anchor_changed and self.valid and self.anchor:
|
||||
# recheck anchor
|
||||
|
|
@ -312,6 +315,7 @@ class UrlBase (object):
|
|||
"dlsize": self.dlsize,
|
||||
"anchors": self.anchors,
|
||||
"anchor": self.anchor,
|
||||
"content_type": self.get_content_type(),
|
||||
}
|
||||
|
||||
def get_alias_cache_data (self):
|
||||
|
|
@ -654,7 +658,9 @@ class UrlBase (object):
|
|||
def get_content_type (self):
|
||||
"""Return content MIME type or empty string.
|
||||
Should be overridden in subclasses."""
|
||||
return u""
|
||||
if self.content_type is None:
|
||||
self.content_type = u""
|
||||
return self.content_type
|
||||
|
||||
def can_get_content (self):
|
||||
"""Indicate wether url get_content() can be called."""
|
||||
|
|
|
|||
Loading…
Reference in a new issue