updated caching

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1132 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2004-01-02 23:30:22 +00:00
parent c0c91b17d5
commit a17bf11f4b
11 changed files with 37 additions and 31 deletions

View file

@ -407,6 +407,7 @@ class UrlData (object):
data = self.getCacheData()
for key in self.getCacheKeys():
self.config.urlCache_set(key, data)
self.config.urlSeen_set(key)
self.cached = True
@ -417,6 +418,11 @@ class UrlData (object):
return [key]
def isCached (self):
key = self.getCacheKey()
return self.cached or self.config.urlSeen_has_key(key)
def getCacheKey (self):
# note: the host is already lowercase
if self.urlparts:
@ -438,7 +444,7 @@ class UrlData (object):
return self.valid and \
self.isParseable() and \
self.hasContent() and \
not self.cached and \
not self.isCached() and \
(self.config["recursionlevel"] < 0 or
self.recursionLevel < self.config["recursionlevel"]) and \
not self.extern[0]

View file

@ -6,9 +6,9 @@ config['recursionlevel'] = True
config['log'] = config.newLogger('test')
config["anchors"] = True
config["verbose"] = True
config.disableThreading()
config.setThreads(0)
htmldir = "test/html"
for file in ('base1.html','base2.html', 'codebase.html'):
url = os.path.join(htmldir, file)
for filename in ('base1.html', 'base2.html', 'codebase.html'):
url = os.path.join(htmldir, filename)
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0, config))
linkcheck.checkUrls(config)

View file

@ -6,9 +6,9 @@ config['recursionlevel'] = True
config['log'] = config.newLogger('test')
config["anchors"] = True
config["verbose"] = True
config.disableThreading()
config.setThreads(0)
htmldir = "test/html"
for file in ('file.html',"file.txt","file.asc"):
url = os.path.join(htmldir, file)
for filename in ('file.html', "file.txt", "file.asc"):
url = os.path.join(htmldir, filename)
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0, config))
linkcheck.checkUrls(config)

View file

@ -6,9 +6,9 @@ config['recursionlevel'] = True
config['log'] = config.newLogger('test')
config["anchors"] = True
config["verbose"] = True
config.disableThreading()
config.setThreads(0)
htmldir = "test/html"
for file in ('frames.html',):
url = os.path.join(htmldir, file)
for filename in ('frames.html',):
url = os.path.join(htmldir, filename)
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0, config))
linkcheck.checkUrls(config)

View file

@ -6,9 +6,9 @@ config['recursionlevel'] = True
config['log'] = config.newLogger('test')
config["anchors"] = True
config["verbose"] = True
config.disableThreading()
config.setThreads(0)
htmldir = "test/html"
for file in ('ftp.html',):
url = os.path.join(htmldir, file)
for filename in ('ftp.html',):
url = os.path.join(htmldir, filename)
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0, config))
linkcheck.checkUrls(config)

View file

@ -6,9 +6,9 @@ config['recursionlevel'] = True
config['log'] = config.newLogger('test')
config["anchors"] = True
config["verbose"] = True
config.disableThreading()
config.setThreads(0)
htmldir = "test/html"
for file in ('http.html',):
url = os.path.join(htmldir, file)
for filename in ('http.html',):
url = os.path.join(htmldir, filename)
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0, config))
linkcheck.checkUrls(config)

View file

@ -6,9 +6,9 @@ config['recursionlevel'] = True
config['log'] = config.newLogger('test')
config["anchors"] = True
config["verbose"] = True
config.disableThreading()
config.setThreads(0)
htmldir = "test/html"
for file in ('https.html',):
url = os.path.join(htmldir, file)
for filename in ('https.html',):
url = os.path.join(htmldir, filename)
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0, config))
linkcheck.checkUrls(config)

View file

@ -6,9 +6,9 @@ config['recursionlevel'] = True
config['log'] = config.newLogger('test')
config["anchors"] = True
config["verbose"] = True
config.disableThreading()
config.setThreads(0)
htmldir = "test/html"
for file in ('mail.html',):
url = os.path.join(htmldir, file)
for filename in ('mail.html',):
url = os.path.join(htmldir, filename)
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0, config))
linkcheck.checkUrls(config)

View file

@ -6,9 +6,9 @@ config['recursionlevel'] = True
config['log'] = config.newLogger('test')
config["anchors"] = True
config["verbose"] = True
config.disableThreading()
config.setThreads(0)
htmldir = "test/html"
for file in ('misc.html','anchor.html'):
url = os.path.join(htmldir, file)
for filename in ('misc.html','anchor.html'):
url = os.path.join(htmldir, filename)
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0, config))
linkcheck.checkUrls(config)

View file

@ -6,9 +6,9 @@ config['recursionlevel'] = True
config['log'] = config.newLogger('test')
config["anchors"] = True
config["verbose"] = True
config.disableThreading()
config.setThreads(0)
htmldir = "test/html"
for file in ('news.html',):
url = os.path.join(htmldir, file)
for filename in ('news.html',):
url = os.path.join(htmldir, filename)
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0, config))
linkcheck.checkUrls(config)

View file

@ -6,9 +6,9 @@ config['recursionlevel'] = True
config['log'] = config.newLogger('test')
config["anchors"] = True
config["verbose"] = True
config.disableThreading()
config.setThreads(0)
htmldir = "test/html"
for file in ('telnet.html',):
url = os.path.join(htmldir, file)
for filename in ('telnet.html',):
url = os.path.join(htmldir, filename)
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0, config))
linkcheck.checkUrls(config)