Another fix to ensure get_content() is only called when allowed.

This commit is contained in:
Bastian Kleineidam 2010-10-13 22:14:43 +02:00
parent 4c62b00e30
commit a8aa3bdb00
2 changed files with 18 additions and 11 deletions

View file

@ -651,6 +651,10 @@ Use URL `%(newurl)s' instead for checking.""") % {
return False
return super(HttpUrl, self).content_allows_robots()
def check_warningregex (self):
if self.method_get_allowed:
super(HttpUrl, self).check_warningregex()
def is_html (self):
"""
See if this URL points to a HTML file by looking at the

View file

@ -679,17 +679,7 @@ class UrlBase (object):
self.get_anchors()
if self.anchor:
self.check_anchor()
warningregex = self.aggregate.config["warningregex"]
if warningregex:
log.debug(LOG_CHECK, "checking content")
try:
match = warningregex.search(self.get_content())
if match:
self.add_warning(_("Found %(match)r in link contents.") %
{"match": match.group()}, tag=WARN_URL_WARNREGEX_FOUND)
except tuple(ExcList):
value = self.handle_exception()
self.set_result(unicode_safe(value), valid=False)
self.check_warningregex()
# is it an intern URL?
if not self.extern[0]:
# check HTML/CSS syntax
@ -705,6 +695,19 @@ class UrlBase (object):
if self.aggregate.config["scanvirus"]:
self.scan_virus()
def check_warningregex (self):
warningregex = self.aggregate.config["warningregex"]
if warningregex:
log.debug(LOG_CHECK, "checking content")
try:
match = warningregex.search(self.get_content())
if match:
self.add_warning(_("Found %(match)r in link contents.") %
{"match": match.group()}, tag=WARN_URL_WARNREGEX_FOUND)
except tuple(ExcList):
value = self.handle_exception()
self.set_result(unicode_safe(value), valid=False)
def check_size (self):
"""Check content size if it is zero or larger than a given
maximum size.