mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-16 22:10:26 +00:00
Make WSGI script more responsive by using threads.
This commit is contained in:
parent
3d831c1adb
commit
1ef9a022ca
8 changed files with 202 additions and 145 deletions
|
|
@ -4,7 +4,7 @@ include config/linkchecker-completion config/create.sql
|
|||
include config/linkcheckerrc
|
||||
include config/linkchecker.apache2.conf install-rpm.sh
|
||||
include linkchecker.freecode
|
||||
include cgi-bin/lc.cgi cgi-bin/README
|
||||
include cgi-bin/lc.wsgi cgi-bin/README
|
||||
include Makefile
|
||||
include debian/rules
|
||||
include debian/changelog
|
||||
|
|
|
|||
1
Makefile
1
Makefile
|
|
@ -176,6 +176,7 @@ doccheck:
|
|||
linkcheck/updater.py \
|
||||
linkcheck/url.py \
|
||||
linkcheck/winutil.py \
|
||||
cgi-bin/lc.wsgi \
|
||||
linkchecker \
|
||||
linkchecker-gui \
|
||||
*.py
|
||||
|
|
|
|||
|
|
@ -1,31 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2000-2009 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
import sys
|
||||
import cgi
|
||||
import linkcheck
|
||||
import linkcheck.lc_cgi
|
||||
|
||||
# log errors to stdout
|
||||
sys.stderr = sys.stdout
|
||||
|
||||
# uncomment the following lines to test your CGI values
|
||||
#cgi.test()
|
||||
#sys.exit(0)
|
||||
linkcheck.lc_cgi.startoutput()
|
||||
linkcheck.lc_cgi.checklink(form=cgi.FieldStorage())
|
||||
|
|
@ -16,30 +16,4 @@
|
|||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
from cStringIO import StringIO
|
||||
import cgi
|
||||
import linkcheck
|
||||
import linkcheck.lc_cgi
|
||||
|
||||
def application(environ, start_response):
|
||||
# the environment variable CONTENT_LENGTH may be empty or missing
|
||||
try:
|
||||
request_body_size = int(environ.get('CONTENT_LENGTH', 0))
|
||||
except (ValueError):
|
||||
request_body_size = 0
|
||||
|
||||
# When the method is POST the query string will be sent
|
||||
# in the HTTP request body which is passed by the WSGI server
|
||||
# in the file like wsgi.input environment variable.
|
||||
request_body = environ['wsgi.input'].read(request_body_size)
|
||||
form = cgi.parse_qs(request_body)
|
||||
|
||||
status = '200 OK'
|
||||
start_response(status, linkcheck.lc_cgi.get_response_headers())
|
||||
output = StringIO()
|
||||
# XXX this is slow since it checks the whole site before showing
|
||||
# any out.
|
||||
# Instead check in a separate thread and yield output as soon
|
||||
# as it is available.
|
||||
linkcheck.lc_cgi.checklink(form=form, out=output, env=environ)
|
||||
return [output.getvalue()]
|
||||
from linkcheck.lc_cgi import application
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ from .linkchecker_ui_main import Ui_MainWindow
|
|||
from .properties import set_properties, clear_properties
|
||||
from .statistics import set_statistics, clear_statistics
|
||||
from .debug import LinkCheckerDebug
|
||||
from .logger import GuiLogger, GuiLogHandler, StatusLogger
|
||||
from .logger import SignalLogger, GuiLogHandler, StatusLogger
|
||||
from .help import HelpWindow
|
||||
from .options import LinkCheckerOptions
|
||||
from .checker import CheckerThread
|
||||
|
|
@ -226,7 +226,7 @@ class LinkCheckerMain (QtGui.QMainWindow, Ui_MainWindow):
|
|||
# dictionary holding overwritten values
|
||||
self.config_backup = {}
|
||||
# set standard GUI configuration values
|
||||
self.config.logger_add("gui", GuiLogger)
|
||||
self.config.logger_add("gui", SignalLogger)
|
||||
self.config["logger"] = self.config.logger_new('gui',
|
||||
signal=self.log_url_signal, stats=self.log_stats_signal)
|
||||
self.config["status"] = True
|
||||
|
|
|
|||
|
|
@ -32,12 +32,12 @@ class GuiLogHandler (Handler, object):
|
|||
self.signal.emit(self.format(record))
|
||||
|
||||
|
||||
class GuiLogger (Logger):
|
||||
"""Delegate log URLs to the UI tree widget."""
|
||||
class SignalLogger (Logger):
|
||||
"""Use Qt signals for logged URLs and statistics."""
|
||||
|
||||
def __init__ (self, **args):
|
||||
"""Store signals for URL and statistic data."""
|
||||
super(GuiLogger, self).__init__(**args)
|
||||
super(SignalLogger, self).__init__(**args)
|
||||
self.log_url_signal = args["signal"]
|
||||
self.log_stats_signal = args["stats"]
|
||||
|
||||
|
|
|
|||
|
|
@ -15,19 +15,45 @@
|
|||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""
|
||||
Common functions used by the CGI and WSGI scripts.
|
||||
Functions used by the WSGI script.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import cgi
|
||||
import os
|
||||
import threading
|
||||
from StringIO import StringIO
|
||||
import locale
|
||||
import re
|
||||
import time
|
||||
import urlparse
|
||||
import types
|
||||
from . import configuration, strformat, checker, director, i18n
|
||||
from . import add_intern_pattern, get_link_pat, init_i18n
|
||||
from . import url as urlutil
|
||||
from PyQt4 import QtCore
|
||||
from . import configuration, strformat, checker, director, \
|
||||
add_intern_pattern, get_link_pat, init_i18n, url as urlutil
|
||||
from .logger import Logger
|
||||
|
||||
|
||||
def application(environ, start_response):
|
||||
"""WSGI interface: start an URL check."""
|
||||
# the environment variable CONTENT_LENGTH may be empty or missing
|
||||
try:
|
||||
request_body_size = int(environ.get('CONTENT_LENGTH', 0))
|
||||
except ValueError:
|
||||
request_body_size = 0
|
||||
|
||||
# When the method is POST the query string will be sent
|
||||
# in the HTTP request body which is passed by the WSGI server
|
||||
# in the file like wsgi.input environment variable.
|
||||
if request_body_size > 0:
|
||||
request_body = environ['wsgi.input'].read(request_body_size)
|
||||
else:
|
||||
request_body = environ['wsgi.input'].read()
|
||||
form = cgi.parse_qs(request_body)
|
||||
|
||||
status = '200 OK'
|
||||
start_response(status, get_response_headers())
|
||||
for output in checklink(form=form, env=environ):
|
||||
yield output
|
||||
|
||||
|
||||
_logfile = None
|
||||
|
|
@ -46,46 +72,152 @@ class LCFormError (StandardError):
|
|||
|
||||
|
||||
def get_response_headers():
|
||||
"""Get list of response headers in key-value form."""
|
||||
return [("Content-type", "text/html"),
|
||||
("Cache-Control", "no-cache"),
|
||||
("Pragma:", "no-cache")
|
||||
]
|
||||
|
||||
def startoutput (out=None):
|
||||
"""Print leading HTML headers to given output stream."""
|
||||
if out is None:
|
||||
out = i18n.get_encoded_writer()
|
||||
for key, value in get_response_headers():
|
||||
out.write("%s: %s\r\n" % (key, value))
|
||||
out.write("\r\n")
|
||||
|
||||
|
||||
def formvalue (form, key):
|
||||
"""Get value with given key from WSGI form."""
|
||||
field = form.get(key)
|
||||
if field is not None and hasattr(field, 'value'):
|
||||
# it's a CGI FormField
|
||||
field = field.value
|
||||
else:
|
||||
# assume WSGI dictionary lists
|
||||
if isinstance(field, list):
|
||||
field = field[0]
|
||||
return field
|
||||
|
||||
|
||||
def checklink (out=None, form=None, env=os.environ):
|
||||
"""Main cgi function, check the given links and print out the result."""
|
||||
if out is None:
|
||||
out = i18n.get_encoded_writer()
|
||||
class ThreadsafeStringIO (StringIO):
|
||||
"""Thread-safe String I/O class."""
|
||||
def __init__(self):
|
||||
self.buf = []
|
||||
self.lock = threading.Lock()
|
||||
self.closed = False
|
||||
|
||||
def write (self, data):
|
||||
self.lock.acquire()
|
||||
try:
|
||||
if self.closed:
|
||||
raise IOError("Write on closed I/O object")
|
||||
self.buf.append(data)
|
||||
finally:
|
||||
self.lock.release()
|
||||
|
||||
def get_data (self):
|
||||
self.lock.acquire()
|
||||
try:
|
||||
data = "".join(self.buf)
|
||||
self.buf = []
|
||||
return data
|
||||
finally:
|
||||
self.lock.release()
|
||||
|
||||
def close (self):
|
||||
self.lock.acquire()
|
||||
try:
|
||||
self.buf = []
|
||||
self.closed = True
|
||||
finally:
|
||||
self.lock.release()
|
||||
|
||||
|
||||
class SignalLogger (Logger):
|
||||
"""Use Qt signals for logged URLs and statistics."""
|
||||
|
||||
def __init__ (self, **args):
|
||||
"""Store signals for URL and statistic data."""
|
||||
super(SignalLogger, self).__init__(**args)
|
||||
self.log_url_signal = args["signal"]
|
||||
self.log_stats_signal = args["stats"]
|
||||
|
||||
def start_fileoutput (self):
|
||||
"""Override fileoutput handling of base class."""
|
||||
pass
|
||||
|
||||
def close_fileoutput (self):
|
||||
"""Override fileoutput handling of base class."""
|
||||
pass
|
||||
|
||||
def log_url (self, url_data):
|
||||
"""Emit URL data which gets logged in the main window."""
|
||||
self.log_url_signal.emit(url_data)
|
||||
|
||||
def end_output (self):
|
||||
"""Emit statistic data which gets logged in the main window."""
|
||||
self.log_stats_signal.emit(self.stats)
|
||||
|
||||
|
||||
class DelegateLogger (QtCore.QObject):
|
||||
"""Logger using connected signals, delegating output to
|
||||
another logger class."""
|
||||
log_url_signal = QtCore.pyqtSignal(object)
|
||||
log_stats_signal = QtCore.pyqtSignal(object)
|
||||
|
||||
def __init__ (self):
|
||||
"""Connect signals to this instance and init state."""
|
||||
super(DelegateLogger, self).__init__()
|
||||
self.log_url_signal.connect(self.log_url)
|
||||
self.log_stats_signal.connect(self.log_stats)
|
||||
self.logger = None
|
||||
self.finished = False
|
||||
|
||||
def add_logger (self, logger):
|
||||
"""Delegate to given logger."""
|
||||
self.logger = logger
|
||||
|
||||
def log_url (self, url_data):
|
||||
"""Delegate URL logging to internal logger."""
|
||||
self.logger.log_url(url_data)
|
||||
|
||||
def log_stats (self, statistics):
|
||||
"""Delegate statistic logging to internal logger."""
|
||||
self.logger.stats = statistics
|
||||
self.logger.end_output()
|
||||
self.finished = True
|
||||
|
||||
|
||||
def checklink (form=None, env=os.environ):
|
||||
"""Validates the CGI form and checks the given links."""
|
||||
if form is None:
|
||||
form = {}
|
||||
try:
|
||||
checkform(form)
|
||||
except LCFormError, why:
|
||||
except LCFormError, errmsg:
|
||||
logit(form, env)
|
||||
print_error(out, why)
|
||||
yield print_error(errmsg)
|
||||
return
|
||||
delegate_logger = DelegateLogger()
|
||||
config = get_configuration(form, delegate_logger)
|
||||
out = ThreadsafeStringIO()
|
||||
html_logger = config.logger_new('html', fd=out)
|
||||
delegate_logger.add_logger(html_logger)
|
||||
url = strformat.stripurl(formvalue(form, "url"))
|
||||
aggregate = director.get_aggregate(config)
|
||||
url_data = checker.get_url_from(url, 0, aggregate)
|
||||
try:
|
||||
add_intern_pattern(url_data, config)
|
||||
except UnicodeError, errmsg:
|
||||
logit({}, env)
|
||||
yield print_error(_("URL has unparsable domain name: %s") % errmsg)
|
||||
return
|
||||
aggregate.urlqueue.put(url_data)
|
||||
html_logger.start_output()
|
||||
# check in background
|
||||
director.check_urls(aggregate)
|
||||
while not delegate_logger.finished:
|
||||
yield out.get_data()
|
||||
time.sleep(2)
|
||||
yield out.get_data()
|
||||
out.close()
|
||||
|
||||
|
||||
def get_configuration(form, logger):
|
||||
"""Initialize a CGI configuration."""
|
||||
config = configuration.Configuration()
|
||||
config["recursionlevel"] = int(formvalue(form, "level"))
|
||||
config["logger"] = config.logger_new('html', fd=out)
|
||||
config.logger_add("signal", SignalLogger)
|
||||
config["logger"] = config.logger_new('signal',
|
||||
signal=logger.log_url_signal, stats=logger.log_stats_signal)
|
||||
config["threads"] = 0
|
||||
if "anchors" in form:
|
||||
config["anchors"] = True
|
||||
|
|
@ -94,20 +226,7 @@ def checklink (out=None, form=None, env=os.environ):
|
|||
# avoid checking of local files or other nasty stuff
|
||||
pat = "!^%s$" % urlutil.safe_url_pattern
|
||||
config["externlinks"].append(get_link_pat(pat, strict=True))
|
||||
# start checking
|
||||
aggregate = director.get_aggregate(config)
|
||||
get_url_from = checker.get_url_from
|
||||
url = strformat.stripurl(formvalue(form, "url"))
|
||||
url_data = get_url_from(url, 0, aggregate)
|
||||
try:
|
||||
add_intern_pattern(url_data, config)
|
||||
except UnicodeError:
|
||||
logit({}, env)
|
||||
print_error(out, _("URL has unparsable domain name: %s") % \
|
||||
sys.exc_info()[1])
|
||||
return
|
||||
aggregate.urlqueue.put(url_data)
|
||||
director.check_urls(aggregate)
|
||||
return config
|
||||
|
||||
|
||||
def get_host_name (form):
|
||||
|
|
@ -165,7 +284,7 @@ def logit (form, env):
|
|||
_logfile.write(str(formvalue(form, key))+"\n")
|
||||
|
||||
|
||||
def print_error (out, why):
|
||||
def print_error (why):
|
||||
"""Print standard error page."""
|
||||
s = _("""<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
|
|
@ -180,4 +299,5 @@ Errors are logged.
|
|||
</blockquote>
|
||||
</body>
|
||||
</html>""") % why
|
||||
out.write(s.encode('iso-8859-1', 'ignore'))
|
||||
return s.encode('iso-8859-1', 'ignore')
|
||||
|
||||
|
|
|
|||
|
|
@ -18,59 +18,52 @@
|
|||
Test cgi form routines.
|
||||
"""
|
||||
import unittest
|
||||
import linkcheck.lc_cgi
|
||||
import wsgiref
|
||||
import urllib
|
||||
from StringIO import StringIO
|
||||
from wsgiref.util import setup_testing_defaults
|
||||
from linkcheck.lc_cgi import checkform, checklink, LCFormError, application
|
||||
|
||||
|
||||
class Store (object):
|
||||
"""
|
||||
Value storing class implementing FieldStorage interface.
|
||||
"""
|
||||
|
||||
def __init__ (self, value):
|
||||
"""
|
||||
Store given value.
|
||||
"""
|
||||
self.value = value
|
||||
|
||||
|
||||
class TestCgi (unittest.TestCase):
|
||||
"""
|
||||
Test cgi routines.
|
||||
"""
|
||||
class TestWsgi (unittest.TestCase):
|
||||
"""Test wsgi application."""
|
||||
|
||||
def test_form_valid_url (self):
|
||||
# Check url validity.
|
||||
form = {"url": Store("http://www.heise.de/"),
|
||||
"level": Store("1"),
|
||||
}
|
||||
linkcheck.lc_cgi.checkform(form)
|
||||
form = dict(url="http://www.example.com/", level="1")
|
||||
checkform(form)
|
||||
|
||||
def test_form_empty_url (self):
|
||||
# Check with empty url.
|
||||
form = {"url": Store(""),
|
||||
"level": Store("0"),
|
||||
}
|
||||
self.assertRaises(linkcheck.lc_cgi.LCFormError,
|
||||
linkcheck.lc_cgi.checkform, form)
|
||||
form = dict(url="", level="0")
|
||||
self.assertRaises(LCFormError, checkform, form)
|
||||
|
||||
def test_form_default_url (self):
|
||||
# Check with default url.
|
||||
form = {"url": Store("http://"),
|
||||
"level": Store("0"),
|
||||
}
|
||||
self.assertRaises(linkcheck.lc_cgi.LCFormError,
|
||||
linkcheck.lc_cgi.checkform, form)
|
||||
form = dict(url="http://", level="0")
|
||||
self.assertRaises(LCFormError, checkform, form)
|
||||
|
||||
def test_form_invalid_url (self):
|
||||
# Check url (in)validity.
|
||||
form = {"url": Store("http://www.foo bar/"),
|
||||
"level": Store("0"),
|
||||
}
|
||||
self.assertRaises(linkcheck.lc_cgi.LCFormError,
|
||||
linkcheck.lc_cgi.checkform, form)
|
||||
form = dict(url="http://www.foo bar/", level="0")
|
||||
self.assertRaises(LCFormError, checkform, form)
|
||||
|
||||
def test_checklink (self):
|
||||
form = {"url": Store("http://www.heise.de/"),
|
||||
"level": Store("0"),
|
||||
}
|
||||
linkcheck.lc_cgi.checklink(form=form)
|
||||
form = dict(url="http://www.example.com/", level="0")
|
||||
checklink(form)
|
||||
|
||||
def test_application (self):
|
||||
form = dict(url="http://www.example.com/", level="0")
|
||||
formdata = urllib.urlencode(form)
|
||||
environ = {'wsgi.input': StringIO(formdata)}
|
||||
setup_testing_defaults(environ)
|
||||
test_response = ""
|
||||
test_headers = [None]
|
||||
test_status = [None]
|
||||
def start_response(status, headers):
|
||||
test_status[0] = status
|
||||
test_headers[0] = headers
|
||||
for chunk in application(environ, start_response):
|
||||
test_response += chunk
|
||||
self.assertEqual(test_status[0], '200 OK')
|
||||
self.assertTrue("Generated by LinkChecker" in test_response)
|
||||
|
|
|
|||
Loading…
Reference in a new issue