2006-05-24 22:16:36 +00:00
|
|
|
# -*- coding: iso-8859-1 -*-
|
2010-10-25 16:10:32 +00:00
|
|
|
# Copyright (C) 2006-2010 Bastian Kleineidam
|
2006-05-24 22:16:36 +00:00
|
|
|
#
|
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
# (at your option) any later version.
|
|
|
|
|
#
|
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
|
#
|
2009-07-24 21:58:20 +00:00
|
|
|
# You should have received a copy of the GNU General Public License along
|
|
|
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
2007-11-29 07:50:22 +00:00
|
|
|
"""
|
|
|
|
|
URL checking functions.
|
|
|
|
|
"""
|
2008-05-09 06:16:03 +00:00
|
|
|
from . import task
|
|
|
|
|
from ..cache import urlqueue
|
2006-05-24 22:16:36 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def check_url (urlqueue, logger):
|
2007-11-29 07:50:22 +00:00
|
|
|
"""Check URLs without threading."""
|
2006-05-24 22:16:36 +00:00
|
|
|
while not urlqueue.empty():
|
|
|
|
|
url_data = urlqueue.get()
|
|
|
|
|
try:
|
|
|
|
|
if not url_data.has_result:
|
|
|
|
|
url_data.check()
|
|
|
|
|
logger.log_url(url_data)
|
|
|
|
|
finally:
|
|
|
|
|
urlqueue.task_done(url_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Checker (task.CheckedTask):
|
2007-11-29 07:50:22 +00:00
|
|
|
"""URL check thread."""
|
2006-05-24 22:16:36 +00:00
|
|
|
|
|
|
|
|
def __init__ (self, urlqueue, logger):
|
2007-11-29 07:50:22 +00:00
|
|
|
"""Store URL queue and logger."""
|
2006-05-24 22:16:36 +00:00
|
|
|
super(Checker, self).__init__()
|
|
|
|
|
self.urlqueue = urlqueue
|
|
|
|
|
self.logger = logger
|
|
|
|
|
self.origname = self.getName()
|
|
|
|
|
|
|
|
|
|
def run_checked (self):
|
2007-11-29 07:50:22 +00:00
|
|
|
"""Check URLs in the queue."""
|
2006-05-24 22:16:36 +00:00
|
|
|
while True:
|
|
|
|
|
self.check_url()
|
|
|
|
|
if self.stopped():
|
2006-06-01 14:13:12 +00:00
|
|
|
break
|
2006-05-24 22:16:36 +00:00
|
|
|
|
|
|
|
|
def check_url (self):
|
2007-11-29 07:50:22 +00:00
|
|
|
"""Try to get URL data from queue and check it."""
|
2006-05-24 22:16:36 +00:00
|
|
|
try:
|
2006-06-04 23:51:32 +00:00
|
|
|
url_data = self.urlqueue.get(timeout=0.1)
|
2006-06-01 14:13:12 +00:00
|
|
|
if url_data is not None:
|
2007-11-29 07:50:22 +00:00
|
|
|
try:
|
|
|
|
|
self.check_url_data(url_data)
|
|
|
|
|
finally:
|
|
|
|
|
self.urlqueue.task_done(url_data)
|
2006-06-01 14:13:12 +00:00
|
|
|
self.setName(self.origname)
|
2008-05-09 06:16:03 +00:00
|
|
|
except urlqueue.Empty:
|
2010-10-23 23:40:32 +00:00
|
|
|
pass
|
2006-05-24 22:16:36 +00:00
|
|
|
|
|
|
|
|
def check_url_data (self, url_data):
|
2007-11-29 07:50:22 +00:00
|
|
|
"""Check one URL data instance."""
|
|
|
|
|
if url_data.url is None:
|
|
|
|
|
url = ""
|
|
|
|
|
else:
|
|
|
|
|
url = url_data.url.encode("ascii", "replace")
|
2009-03-02 12:18:08 +00:00
|
|
|
self.setName("CheckThread-%s" % url)
|
2007-11-29 07:50:22 +00:00
|
|
|
if not url_data.has_result:
|
|
|
|
|
url_data.check()
|
|
|
|
|
self.logger.log_url(url_data)
|