only check robots.txt for http

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1285 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2004-04-03 16:34:58 +00:00
parent 67fabd5d8e
commit 8584d5bc8e
3 changed files with 10 additions and 2 deletions

View file

@ -388,6 +388,10 @@ class HttpUrlData (ProxyUrlData):
return True
def isHttp (self):
return True
def isParseable (self):
if not (self.valid and self.headers):
return False

View file

@ -218,6 +218,10 @@ class UrlData (object):
return False
def isHttp (self):
return False
def setWarning (self, s):
if self.warningString:
self.warningString += "\n"+s
@ -455,7 +459,7 @@ class UrlData (object):
def contentAllowsRobots (self):
if not self.isHtml():
if not self.isHttp():
return True
h = MetaRobotsFinder(self.getContent())
p = htmlsax.parser(h)

View file

@ -88,7 +88,7 @@ def checkUrls (config):
def printStatus (config, curtime, start_time):
tocheck = config.urls.qsize()
tocheck = len(config.urls)
links = config['linknumber']
active = config.threader.active_threads()
duration = strduration(curtime - start_time)