mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-16 22:10:26 +00:00
don't check one url multiple times
This commit is contained in:
parent
bf66006375
commit
eaa538c814
2 changed files with 9 additions and 2 deletions
4
linkcheck/cache/results.py
vendored
4
linkcheck/cache/results.py
vendored
|
|
@ -59,6 +59,10 @@ class ResultCache(object):
|
|||
"""Non-thread-safe function for fast containment checks."""
|
||||
return key in self.cache
|
||||
|
||||
def has_non_empty_result(self, key):
|
||||
"""Non-thread-safe function for fast containment checks."""
|
||||
return self.cache.get(key)
|
||||
|
||||
def __len__(self):
|
||||
"""Get number of cached elements. This is not thread-safe and is
|
||||
likely to change before the returned value is used."""
|
||||
|
|
|
|||
7
linkcheck/cache/urlqueue.py
vendored
7
linkcheck/cache/urlqueue.py
vendored
|
|
@ -120,7 +120,9 @@ class UrlQueue (object):
|
|||
log.debug(LOG_CACHE, "queueing %s", url_data.url)
|
||||
key = url_data.cache_url
|
||||
cache = url_data.aggregate.result_cache
|
||||
if url_data.has_result or cache.has_result(key):
|
||||
if cache.has_result(key):
|
||||
return
|
||||
if url_data.has_result:
|
||||
self.queue.appendleft(url_data)
|
||||
else:
|
||||
assert key is not None, "no result for None key: %s" % url_data
|
||||
|
|
@ -131,6 +133,7 @@ class UrlQueue (object):
|
|||
self.cleanup()
|
||||
self.queue.append(url_data)
|
||||
self.unfinished_tasks += 1
|
||||
cache.add_result(key, None) # add none value to cache to prevent checking this url multiple times
|
||||
|
||||
def cleanup(self):
|
||||
"""Move cached elements to top."""
|
||||
|
|
@ -139,7 +142,7 @@ class UrlQueue (object):
|
|||
for i, url_data in enumerate(self.queue):
|
||||
key = url_data.cache_url
|
||||
cache = url_data.aggregate.result_cache
|
||||
if cache.has_result(key):
|
||||
if cache.has_non_empty_result(key):
|
||||
cached.append(i)
|
||||
for pos in cached:
|
||||
self._move_to_top(pos)
|
||||
|
|
|
|||
Loading…
Reference in a new issue