mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-18 21:31:00 +00:00
don't check one url multiple times
This commit is contained in:
parent
bf66006375
commit
eaa538c814
2 changed files with 9 additions and 2 deletions
4
linkcheck/cache/results.py
vendored
4
linkcheck/cache/results.py
vendored
|
|
@ -59,6 +59,10 @@ class ResultCache(object):
|
||||||
"""Non-thread-safe function for fast containment checks."""
|
"""Non-thread-safe function for fast containment checks."""
|
||||||
return key in self.cache
|
return key in self.cache
|
||||||
|
|
||||||
|
def has_non_empty_result(self, key):
|
||||||
|
"""Non-thread-safe function for fast containment checks."""
|
||||||
|
return self.cache.get(key)
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
"""Get number of cached elements. This is not thread-safe and is
|
"""Get number of cached elements. This is not thread-safe and is
|
||||||
likely to change before the returned value is used."""
|
likely to change before the returned value is used."""
|
||||||
|
|
|
||||||
7
linkcheck/cache/urlqueue.py
vendored
7
linkcheck/cache/urlqueue.py
vendored
|
|
@ -120,7 +120,9 @@ class UrlQueue (object):
|
||||||
log.debug(LOG_CACHE, "queueing %s", url_data.url)
|
log.debug(LOG_CACHE, "queueing %s", url_data.url)
|
||||||
key = url_data.cache_url
|
key = url_data.cache_url
|
||||||
cache = url_data.aggregate.result_cache
|
cache = url_data.aggregate.result_cache
|
||||||
if url_data.has_result or cache.has_result(key):
|
if cache.has_result(key):
|
||||||
|
return
|
||||||
|
if url_data.has_result:
|
||||||
self.queue.appendleft(url_data)
|
self.queue.appendleft(url_data)
|
||||||
else:
|
else:
|
||||||
assert key is not None, "no result for None key: %s" % url_data
|
assert key is not None, "no result for None key: %s" % url_data
|
||||||
|
|
@ -131,6 +133,7 @@ class UrlQueue (object):
|
||||||
self.cleanup()
|
self.cleanup()
|
||||||
self.queue.append(url_data)
|
self.queue.append(url_data)
|
||||||
self.unfinished_tasks += 1
|
self.unfinished_tasks += 1
|
||||||
|
cache.add_result(key, None) # add none value to cache to prevent checking this url multiple times
|
||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
"""Move cached elements to top."""
|
"""Move cached elements to top."""
|
||||||
|
|
@ -139,7 +142,7 @@ class UrlQueue (object):
|
||||||
for i, url_data in enumerate(self.queue):
|
for i, url_data in enumerate(self.queue):
|
||||||
key = url_data.cache_url
|
key = url_data.cache_url
|
||||||
cache = url_data.aggregate.result_cache
|
cache = url_data.aggregate.result_cache
|
||||||
if cache.has_result(key):
|
if cache.has_non_empty_result(key):
|
||||||
cached.append(i)
|
cached.append(i)
|
||||||
for pos in cached:
|
for pos in cached:
|
||||||
self._move_to_top(pos)
|
self._move_to_top(pos)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue