linkchecker/linkcheck/director/__init__.py

130 lines
4.1 KiB
Python

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2006-2009 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
"""
Management of checking a queue of links with several threads.
"""
import time
import os
import thread
from .. import log, LOG_CHECK
from ..cache import urlqueue, robots_txt, cookie, connection
from . import aggregator, console
def check_urls (aggregate):
"""Main check function; checks all configured URLs until interrupted
with Ctrl-C.
@return: None
"""
try:
aggregate.logger.start_log_output()
if not aggregate.urlqueue.empty():
aggregate.start_threads()
check_url(aggregate)
aggregate.finish()
aggregate.logger.end_log_output()
except KeyboardInterrupt:
interrupt(aggregate)
except thread.error:
log.warn(LOG_CHECK,
_("Could not start a new thread. Check that the current user" \
" is allowed to start new threads."))
abort(aggregate)
except Exception:
console.internal_error()
abort(aggregate)
def check_url (aggregate):
"""Helper function waiting for URL queue."""
while True:
try:
aggregate.urlqueue.join(timeout=1)
break
except urlqueue.Timeout:
# Since urlqueue.join() is not interruptable, add a timeout
# and a one-second slumber.
time.sleep(1)
aggregate.remove_stopped_threads()
if not aggregate.threads:
break
if aggregate.wanted_stop:
# some other thread wants us to stop
raise KeyboardInterrupt
def interrupt (aggregate):
"""Interrupt execution and shutdown, ignoring any subsequent
interrupts."""
while True:
try:
log.warn(LOG_CHECK,
_("keyboard interrupt; waiting for active threads to finish"))
log.warn(LOG_CHECK,
_("another keyboard interrupt will exit immediately"))
print_active_threads(aggregate)
abort(aggregate)
break
except KeyboardInterrupt:
pass
def print_active_threads (aggregate):
if not aggregate.threads:
return
log.info(LOG_CHECK, _("These URLs are still active:"))
for t in aggregate.threads:
name = t.getName()
if name.startswith("Check-"):
log.info(LOG_CHECK, name[6:])
def abort (aggregate):
"""Helper function to ensure a clean shutdown."""
while True:
try:
aggregate.abort()
aggregate.finish()
aggregate.logger.end_log_output()
break
except KeyboardInterrupt:
log.warn(LOG_CHECK, _("keyboard interrupt; force shutdown"))
abort_now()
def abort_now ():
"""Force exit of current process without cleanup."""
if os.name == 'posix':
# Unix systems can use sigkill
import signal
os.kill(os.getpid(), signal.SIGKILL)
elif os.name == 'nt':
# NT as os.abort()
os.abort()
else:
# All other systems have os._exit() as best shot.
os._exit(3)
def get_aggregate (config):
"""Get an aggregator instance with given configuration."""
_urlqueue = urlqueue.UrlQueue()
connections = connection.ConnectionPool(wait=config["wait"])
cookies = cookie.CookieJar()
_robots_txt = robots_txt.RobotsTxt()
return aggregator.Aggregate(config, _urlqueue, connections,
cookies, _robots_txt)