mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-04 15:10:23 +00:00
Do not sort URL queue anymore.
This commit is contained in:
parent
7a6436f08f
commit
d8fce1ceeb
1 changed files with 0 additions and 23 deletions
23
linkcheck/cache/urlqueue.py
vendored
23
linkcheck/cache/urlqueue.py
vendored
|
|
@ -56,7 +56,6 @@ class UrlQueue (object):
|
|||
self.in_progress = {}
|
||||
self.checked = LFUCache(size=100000)
|
||||
self.shutdown = False
|
||||
self.unsorted = 0
|
||||
|
||||
def qsize (self):
|
||||
"""Return the approximate size of the queue (not reliable!)."""
|
||||
|
|
@ -146,10 +145,6 @@ class UrlQueue (object):
|
|||
self.queue.appendleft(url_data)
|
||||
else:
|
||||
self.queue.append(url_data)
|
||||
self.unsorted += 1
|
||||
if self.unsorted > 2000:
|
||||
self._sort()
|
||||
self.unsorted = 0
|
||||
self.unfinished_tasks += 1
|
||||
|
||||
def task_done (self, url_data):
|
||||
|
|
@ -199,24 +194,6 @@ class UrlQueue (object):
|
|||
log.debug(LOG_CACHE, "Caching alias %r", key)
|
||||
self.checked[key] = data
|
||||
|
||||
def _sort (self):
|
||||
"""Sort URL queue by putting all cached URLs at the beginning."""
|
||||
newqueue = collections.deque()
|
||||
while self.queue:
|
||||
url_data = self.queue.popleft()
|
||||
key = url_data.cache_url_key
|
||||
if url_data.has_result:
|
||||
# Already checked and copied from cache.
|
||||
newqueue.appendleft(url_data)
|
||||
elif key in self.checked:
|
||||
# Already checked; copy result. And even ignore
|
||||
# the case where url happens to be in_progress.
|
||||
url_data.copy_from_cache(self.checked[key])
|
||||
newqueue.appendleft(url_data)
|
||||
else:
|
||||
newqueue.append(url_data)
|
||||
self.queue = newqueue
|
||||
|
||||
def join (self, timeout=None):
|
||||
"""Blocks until all items in the Queue have been gotten and processed.
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue