Do not check content of already cached URLs.

This commit is contained in:
Bastian Kleineidam 2010-10-27 19:31:36 +02:00
parent 1f81124dfa
commit f14340a0a8
3 changed files with 17 additions and 11 deletions

View file

@ -1,6 +1,8 @@
5.5 "" (released xx.xx.2010)
Fixes:
- checking: Do not check content of already cached URLs.
Closes: SF bug #1720083
Changes:
- cmdline: Don't log a warning if URL has been redirected.

View file

@ -270,6 +270,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
if tries == -1:
log.debug(LOG_CHECK, "already handled")
response.close()
self.do_check_content = False
return None
if tries >= self.max_redirects:
if self.method == "HEAD" and self.method_get_allowed:

View file

@ -177,6 +177,8 @@ class UrlBase (object):
self.caching = True
# title is either the URL or parsed from content
self.title = None
# flag if content should be checked or not
self.do_check_content = True
def set_result (self, msg, valid=True, overwrite=False):
"""
@ -481,17 +483,18 @@ class UrlBase (object):
value = _('Bad HTTP response %(line)r') % {"line": str(value)}
self.set_result(unicode_safe(value), valid=False)
self.checktime = time.time() - check_start
# check content and recursion
try:
self.check_content()
if self.allows_recursion():
self.parse_url()
# check content size
self.check_size()
except tuple(ExcList):
value = self.handle_exception()
self.add_warning(_("could not get content: %(msg)r") %
{"msg": str(value)}, tag=WARN_URL_ERROR_GETTING_CONTENT)
if self.do_check_content:
# check content and recursion
try:
self.check_content()
if self.allows_recursion():
self.parse_url()
# check content size
self.check_size()
except tuple(ExcList):
value = self.handle_exception()
self.add_warning(_("could not get content: %(msg)r") %
{"msg": str(value)}, tag=WARN_URL_ERROR_GETTING_CONTENT)
def close_connection (self):
"""