mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-08 16:41:00 +00:00
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2239 e7d03fd6-7b0d-0410-9947-9c21f3af8025
243 lines
7.2 KiB
Python
243 lines
7.2 KiB
Python
# -*- coding: iso-8859-1 -*-
|
|
# Copyright (C) 2000-2005 Bastian Kleineidam
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
"""
|
|
Url consumer class.
|
|
"""
|
|
|
|
import sys
|
|
import time
|
|
try:
|
|
import threading
|
|
except ImportError:
|
|
import dummy_threading as threading
|
|
|
|
import linkcheck.threader
|
|
import linkcheck.log
|
|
import linkcheck.strformat
|
|
from urlbase import stderr
|
|
|
|
|
|
def print_tocheck (tocheck):
|
|
msg = _n("%5d URL queued,", "%5d URLs queued,", tocheck) % tocheck
|
|
print >> stderr, msg,
|
|
|
|
|
|
def print_links (links):
|
|
msg = _n("%4d URL checked,", "%4d URLs checked,", links) % links
|
|
print >> stderr, msg,
|
|
|
|
|
|
def print_active (active):
|
|
msg = _n("%2d active thread,", "%2d active threads,", active) % active
|
|
print >> stderr, msg,
|
|
|
|
|
|
def print_duration (duration):
|
|
msg = _("runtime %s") % linkcheck.strformat.strduration(duration)
|
|
print >> stderr, msg,
|
|
|
|
|
|
class Consumer (object):
|
|
"""
|
|
Consume urls from the url queue in a threaded manner.
|
|
"""
|
|
|
|
def __init__ (self, config, cache):
|
|
"""
|
|
Initialize consumer data and threads.
|
|
"""
|
|
self.config = config
|
|
self.cache = cache
|
|
self.threader = linkcheck.threader.Threader()
|
|
self._set_threads(config['threads'])
|
|
self.logger = config['logger']
|
|
self.fileoutput = config['fileoutput']
|
|
self.linknumber = 0
|
|
# one lock for the data
|
|
self.lock = threading.Lock()
|
|
# if checking had errors
|
|
self.errors = False
|
|
# if checking had warnings
|
|
self.warnings = False
|
|
self.logger_start_output()
|
|
|
|
def _set_threads (self, num):
|
|
"""
|
|
Set number of checker threads to start.
|
|
"""
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK,
|
|
"set threading with %d threads", num)
|
|
self.threader.threads_max = num
|
|
if num > 0:
|
|
sys.setcheckinterval(50)
|
|
else:
|
|
sys.setcheckinterval(100)
|
|
|
|
def append_url (self, url_data):
|
|
"""
|
|
Append url to incoming check list.
|
|
"""
|
|
if not self.cache.incoming_add(url_data):
|
|
# can be logged
|
|
self.logger_new_url(url_data)
|
|
|
|
def check_url (self):
|
|
"""start new thread checking the given url"""
|
|
url_data = self.cache.incoming_get_url()
|
|
if url_data is None:
|
|
# active connections are downloading/parsing, so
|
|
# wait a little
|
|
time.sleep(0.1)
|
|
elif url_data.cached:
|
|
# was cached -> can be logged
|
|
self.logger_new_url(url_data)
|
|
else:
|
|
# go check this url
|
|
# this calls either self.checked() or self.interrupted()
|
|
self.threader.start_thread(url_data.check, ())
|
|
|
|
def checked (self, url_data):
|
|
"""
|
|
Put checked url in cache and log it.
|
|
"""
|
|
# log before putting it in the cache (otherwise we would see
|
|
# a "(cached)" after every url
|
|
self.logger_new_url(url_data)
|
|
if not url_data.cached:
|
|
self.cache.checked_add(url_data)
|
|
else:
|
|
self.cache.in_progress_remove(url_data)
|
|
|
|
def interrupted (self, url_data):
|
|
"""
|
|
Remove url from active list.
|
|
"""
|
|
self.cache.in_progress_remove(url_data)
|
|
|
|
def finished (self):
|
|
"""
|
|
Return True if checking is finished.
|
|
"""
|
|
self.lock.acquire()
|
|
try:
|
|
return self.threader.finished() and \
|
|
self.cache.incoming_len() <= 0
|
|
finally:
|
|
self.lock.release()
|
|
|
|
def no_more_threads (self):
|
|
"""
|
|
Return True if no more active threads are running.
|
|
"""
|
|
self.lock.acquire()
|
|
try:
|
|
return self.threader.finished()
|
|
finally:
|
|
self.lock.release()
|
|
|
|
def abort (self):
|
|
"""
|
|
Abort checking and send of-of-output message to logger.
|
|
"""
|
|
while not self.no_more_threads():
|
|
num = self.active_threads()
|
|
msg = \
|
|
_n("keyboard interrupt; waiting for %d active thread to finish",
|
|
"keyboard interrupt; waiting for %d active threads to finish",
|
|
num)
|
|
linkcheck.log.warn(linkcheck.LOG_CHECK, msg, num)
|
|
self.lock.acquire()
|
|
try:
|
|
self.threader.finish()
|
|
finally:
|
|
self.lock.release()
|
|
time.sleep(2)
|
|
self.logger_end_output()
|
|
|
|
def print_status (self, curtime, start_time):
|
|
"""
|
|
Print check status looking at url queues.
|
|
"""
|
|
self.lock.acquire()
|
|
try:
|
|
print >> stderr, _("Status:"),
|
|
active = self.threader.active_threads()
|
|
print_active(active)
|
|
print_links(self.linknumber)
|
|
print_tocheck(self.cache.incoming_len())
|
|
print_duration(curtime - start_time)
|
|
print >> stderr
|
|
finally:
|
|
self.lock.release()
|
|
|
|
def logger_start_output (self):
|
|
"""
|
|
Start output of all configured loggers.
|
|
"""
|
|
self.lock.acquire()
|
|
try:
|
|
self.logger.start_output()
|
|
for logger in self.fileoutput:
|
|
logger.start_output()
|
|
finally:
|
|
self.lock.release()
|
|
|
|
def logger_new_url (self, url_data):
|
|
"""
|
|
Send new url to all configured loggers.
|
|
"""
|
|
self.lock.acquire()
|
|
try:
|
|
self.linknumber += 1
|
|
do_filter = (self.linknumber % 1000) == 0
|
|
if not url_data.valid:
|
|
self.errors = True
|
|
if url_data.warning and self.config["warnings"]:
|
|
self.warnings = True
|
|
if (self.config["verbose"] or not url_data.valid or
|
|
(url_data.warning and self.config["warnings"])):
|
|
self.logger.new_url(url_data)
|
|
for log in self.fileoutput:
|
|
log.new_url(url_data)
|
|
finally:
|
|
self.lock.release()
|
|
# XXX deadlock!
|
|
#if do_filter:
|
|
# self.filter_queue(self)
|
|
|
|
def logger_end_output (self):
|
|
"""
|
|
End output of all configured loggers.
|
|
"""
|
|
self.lock.acquire()
|
|
try:
|
|
self.logger.end_output(linknumber=self.linknumber)
|
|
for logger in self.fileoutput:
|
|
logger.end_output(linknumber=self.linknumber)
|
|
finally:
|
|
self.lock.release()
|
|
|
|
def active_threads (self):
|
|
"""
|
|
Return number of active threads.
|
|
"""
|
|
self.lock.acquire()
|
|
try:
|
|
return self.threader.active_threads()
|
|
finally:
|
|
self.lock.release()
|
|
|