recheck robots.txt allowance after redirect

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2002 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2004-11-18 00:57:35 +00:00
parent 89616a4bba
commit d2a6f4ed95

View file

@ -250,6 +250,11 @@ class HttpUrl (urlbase.UrlBase, proxysupport.ProxySupport):
self.add_info(
_("outside of domain filter, checked only syntax"))
return -1, response
# check robots.txt allowance again
if not self.allows_robots(redirected):
self.add_warning(
_("Access denied by robots.txt, checked only syntax"))
return -1, response
# see about recursive redirect
all_seen = self.aliases + [self.cache_url_key]
if redirected in all_seen: