check syntax and cache before putting url objects in the queue

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1277 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2004-03-04 12:17:38 +00:00
parent f0e9009b0e
commit d8e738c60b
2 changed files with 39 additions and 31 deletions

View file

@ -237,8 +237,14 @@ class Configuration (dict):
self.threader.finish()
def appendUrl (self, url):
self.urls.put(url)
def appendUrl (self, urlData):
# check syntax
if not urlData.checkSyntax():
return
# check the cache
if not urlData.checkCache():
return
self.urls.put(urlData)
def getUrl (self):

View file

@ -311,35 +311,6 @@ class UrlData (object):
debug(BRING_IT_ON, "sleeping for", self.config['wait'], "seconds")
time.sleep(self.config['wait'])
t = time.time()
# check syntax
debug(BRING_IT_ON, "checking syntax")
if not self.urlName or self.urlName=="":
self.setError(i18n._("URL is null or empty"))
self.logMe()
return
if ws_at_start_or_end(self.urlName):
self.setError(i18n._("URL has whitespace at beginning or end"))
self.logMe()
return
try:
self.buildUrl()
self.extern = self._getExtern()
except tuple(ExcList):
value, tb = sys.exc_info()[1:]
debug(HURT_ME_PLENTY, "exception", traceback.format_tb(tb))
self.setError(str(value))
self.logMe()
return
# check the cache
debug(BRING_IT_ON, "checking cache")
for key in self.getCacheKeys():
if self.config.urlCache_has_key(key):
self.copyFromCache(self.config.urlCache_get(key))
self.cached = True
self.logMe()
return
# apply filter
debug(BRING_IT_ON, "extern =", self.extern)
if self.extern[0] and (self.config["strict"] or self.extern[1]):
@ -394,6 +365,37 @@ class UrlData (object):
self.putInCache()
def checkSyntax (self):
debug(BRING_IT_ON, "checking syntax")
if not self.urlName or self.urlName=="":
self.setError(i18n._("URL is null or empty"))
self.logMe()
return False
if ws_at_start_or_end(self.urlName):
self.setError(i18n._("URL has whitespace at beginning or end"))
self.logMe()
return False
try:
self.buildUrl()
self.extern = self._getExtern()
except LinkCheckerError, msg:
self.setError(str(msg))
self.logMe()
return False
return True
def checkCache (self):
debug(BRING_IT_ON, "checking cache")
for key in self.getCacheKeys():
if self.config.urlCache_has_key(key):
self.copyFromCache(self.config.urlCache_get(key))
self.cached = True
self.logMe()
return False
return True
def closeConnection (self):
# brute force closing
if self.urlConnection is not None: