mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-05 13:14:46 +00:00
only check robots.txt for http
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1285 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
67fabd5d8e
commit
8584d5bc8e
3 changed files with 10 additions and 2 deletions
|
|
@ -388,6 +388,10 @@ class HttpUrlData (ProxyUrlData):
|
|||
return True
|
||||
|
||||
|
||||
def isHttp (self):
|
||||
return True
|
||||
|
||||
|
||||
def isParseable (self):
|
||||
if not (self.valid and self.headers):
|
||||
return False
|
||||
|
|
|
|||
|
|
@ -218,6 +218,10 @@ class UrlData (object):
|
|||
return False
|
||||
|
||||
|
||||
def isHttp (self):
|
||||
return False
|
||||
|
||||
|
||||
def setWarning (self, s):
|
||||
if self.warningString:
|
||||
self.warningString += "\n"+s
|
||||
|
|
@ -455,7 +459,7 @@ class UrlData (object):
|
|||
|
||||
|
||||
def contentAllowsRobots (self):
|
||||
if not self.isHtml():
|
||||
if not self.isHttp():
|
||||
return True
|
||||
h = MetaRobotsFinder(self.getContent())
|
||||
p = htmlsax.parser(h)
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ def checkUrls (config):
|
|||
|
||||
|
||||
def printStatus (config, curtime, start_time):
|
||||
tocheck = config.urls.qsize()
|
||||
tocheck = len(config.urls)
|
||||
links = config['linknumber']
|
||||
active = config.threader.active_threads()
|
||||
duration = strduration(curtime - start_time)
|
||||
|
|
|
|||
Loading…
Reference in a new issue