mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-19 05:41:00 +00:00
check syntax and cache before putting url objects in the queue
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1277 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
f0e9009b0e
commit
d8e738c60b
2 changed files with 39 additions and 31 deletions
|
|
@ -237,8 +237,14 @@ class Configuration (dict):
|
|||
self.threader.finish()
|
||||
|
||||
|
||||
def appendUrl (self, url):
|
||||
self.urls.put(url)
|
||||
def appendUrl (self, urlData):
|
||||
# check syntax
|
||||
if not urlData.checkSyntax():
|
||||
return
|
||||
# check the cache
|
||||
if not urlData.checkCache():
|
||||
return
|
||||
self.urls.put(urlData)
|
||||
|
||||
|
||||
def getUrl (self):
|
||||
|
|
|
|||
|
|
@ -311,35 +311,6 @@ class UrlData (object):
|
|||
debug(BRING_IT_ON, "sleeping for", self.config['wait'], "seconds")
|
||||
time.sleep(self.config['wait'])
|
||||
t = time.time()
|
||||
# check syntax
|
||||
debug(BRING_IT_ON, "checking syntax")
|
||||
if not self.urlName or self.urlName=="":
|
||||
self.setError(i18n._("URL is null or empty"))
|
||||
self.logMe()
|
||||
return
|
||||
if ws_at_start_or_end(self.urlName):
|
||||
self.setError(i18n._("URL has whitespace at beginning or end"))
|
||||
self.logMe()
|
||||
return
|
||||
try:
|
||||
self.buildUrl()
|
||||
self.extern = self._getExtern()
|
||||
except tuple(ExcList):
|
||||
value, tb = sys.exc_info()[1:]
|
||||
debug(HURT_ME_PLENTY, "exception", traceback.format_tb(tb))
|
||||
self.setError(str(value))
|
||||
self.logMe()
|
||||
return
|
||||
|
||||
# check the cache
|
||||
debug(BRING_IT_ON, "checking cache")
|
||||
for key in self.getCacheKeys():
|
||||
if self.config.urlCache_has_key(key):
|
||||
self.copyFromCache(self.config.urlCache_get(key))
|
||||
self.cached = True
|
||||
self.logMe()
|
||||
return
|
||||
|
||||
# apply filter
|
||||
debug(BRING_IT_ON, "extern =", self.extern)
|
||||
if self.extern[0] and (self.config["strict"] or self.extern[1]):
|
||||
|
|
@ -394,6 +365,37 @@ class UrlData (object):
|
|||
self.putInCache()
|
||||
|
||||
|
||||
def checkSyntax (self):
|
||||
debug(BRING_IT_ON, "checking syntax")
|
||||
if not self.urlName or self.urlName=="":
|
||||
self.setError(i18n._("URL is null or empty"))
|
||||
self.logMe()
|
||||
return False
|
||||
if ws_at_start_or_end(self.urlName):
|
||||
self.setError(i18n._("URL has whitespace at beginning or end"))
|
||||
self.logMe()
|
||||
return False
|
||||
try:
|
||||
self.buildUrl()
|
||||
self.extern = self._getExtern()
|
||||
except LinkCheckerError, msg:
|
||||
self.setError(str(msg))
|
||||
self.logMe()
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def checkCache (self):
|
||||
debug(BRING_IT_ON, "checking cache")
|
||||
for key in self.getCacheKeys():
|
||||
if self.config.urlCache_has_key(key):
|
||||
self.copyFromCache(self.config.urlCache_get(key))
|
||||
self.cached = True
|
||||
self.logMe()
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def closeConnection (self):
|
||||
# brute force closing
|
||||
if self.urlConnection is not None:
|
||||
|
|
|
|||
Loading…
Reference in a new issue