Do not sort URL queue anymore.

This commit is contained in:
Bastian Kleineidam 2012-09-02 22:32:14 +02:00
parent 7a6436f08f
commit d8fce1ceeb

View file

@ -56,7 +56,6 @@ class UrlQueue (object):
self.in_progress = {}
self.checked = LFUCache(size=100000)
self.shutdown = False
self.unsorted = 0
def qsize (self):
"""Return the approximate size of the queue (not reliable!)."""
@ -146,10 +145,6 @@ class UrlQueue (object):
self.queue.appendleft(url_data)
else:
self.queue.append(url_data)
self.unsorted += 1
if self.unsorted > 2000:
self._sort()
self.unsorted = 0
self.unfinished_tasks += 1
def task_done (self, url_data):
@ -199,24 +194,6 @@ class UrlQueue (object):
log.debug(LOG_CACHE, "Caching alias %r", key)
self.checked[key] = data
def _sort (self):
"""Sort URL queue by putting all cached URLs at the beginning."""
newqueue = collections.deque()
while self.queue:
url_data = self.queue.popleft()
key = url_data.cache_url_key
if url_data.has_result:
# Already checked and copied from cache.
newqueue.appendleft(url_data)
elif key in self.checked:
# Already checked; copy result. And even ignore
# the case where url happens to be in_progress.
url_data.copy_from_cache(self.checked[key])
newqueue.appendleft(url_data)
else:
newqueue.append(url_data)
self.queue = newqueue
def join (self, timeout=None):
"""Blocks until all items in the Queue have been gotten and processed.