diff --git a/MANIFEST.in b/MANIFEST.in index 4612ab99..779b1865 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,7 +4,7 @@ include config/linkchecker-completion config/create.sql include config/linkcheckerrc include config/linkchecker.apache2.conf install-rpm.sh include linkchecker.freecode -include cgi-bin/lc.cgi cgi-bin/README +include cgi-bin/lc.wsgi cgi-bin/README include Makefile include debian/rules include debian/changelog diff --git a/Makefile b/Makefile index 1fc91173..cce5ba67 100644 --- a/Makefile +++ b/Makefile @@ -176,6 +176,7 @@ doccheck: linkcheck/updater.py \ linkcheck/url.py \ linkcheck/winutil.py \ + cgi-bin/lc.wsgi \ linkchecker \ linkchecker-gui \ *.py diff --git a/cgi-bin/lc.cgi b/cgi-bin/lc.cgi deleted file mode 100755 index 4615937a..00000000 --- a/cgi-bin/lc.cgi +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/python -# -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2009 Bastian Kleineidam -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -import sys -import cgi -import linkcheck -import linkcheck.lc_cgi - -# log errors to stdout -sys.stderr = sys.stdout - -# uncomment the following lines to test your CGI values -#cgi.test() -#sys.exit(0) -linkcheck.lc_cgi.startoutput() -linkcheck.lc_cgi.checklink(form=cgi.FieldStorage()) diff --git a/cgi-bin/lc.wsgi b/cgi-bin/lc.wsgi index d9f4b84a..109cd0ec 100644 --- a/cgi-bin/lc.wsgi +++ b/cgi-bin/lc.wsgi @@ -16,30 +16,4 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -from cStringIO import StringIO -import cgi -import linkcheck -import linkcheck.lc_cgi - -def application(environ, start_response): - # the environment variable CONTENT_LENGTH may be empty or missing - try: - request_body_size = int(environ.get('CONTENT_LENGTH', 0)) - except (ValueError): - request_body_size = 0 - - # When the method is POST the query string will be sent - # in the HTTP request body which is passed by the WSGI server - # in the file like wsgi.input environment variable. - request_body = environ['wsgi.input'].read(request_body_size) - form = cgi.parse_qs(request_body) - - status = '200 OK' - start_response(status, linkcheck.lc_cgi.get_response_headers()) - output = StringIO() - # XXX this is slow since it checks the whole site before showing - # any out. - # Instead check in a separate thread and yield output as soon - # as it is available. - linkcheck.lc_cgi.checklink(form=form, out=output, env=environ) - return [output.getvalue()] +from linkcheck.lc_cgi import application diff --git a/linkcheck/gui/__init__.py b/linkcheck/gui/__init__.py index c5c731fd..7eacd3d6 100644 --- a/linkcheck/gui/__init__.py +++ b/linkcheck/gui/__init__.py @@ -24,7 +24,7 @@ from .linkchecker_ui_main import Ui_MainWindow from .properties import set_properties, clear_properties from .statistics import set_statistics, clear_statistics from .debug import LinkCheckerDebug -from .logger import GuiLogger, GuiLogHandler, StatusLogger +from .logger import SignalLogger, GuiLogHandler, StatusLogger from .help import HelpWindow from .options import LinkCheckerOptions from .checker import CheckerThread @@ -226,7 +226,7 @@ class LinkCheckerMain (QtGui.QMainWindow, Ui_MainWindow): # dictionary holding overwritten values self.config_backup = {} # set standard GUI configuration values - self.config.logger_add("gui", GuiLogger) + self.config.logger_add("gui", SignalLogger) self.config["logger"] = self.config.logger_new('gui', signal=self.log_url_signal, stats=self.log_stats_signal) self.config["status"] = True diff --git a/linkcheck/gui/logger.py b/linkcheck/gui/logger.py index 1a54018f..3f818a65 100644 --- a/linkcheck/gui/logger.py +++ b/linkcheck/gui/logger.py @@ -32,12 +32,12 @@ class GuiLogHandler (Handler, object): self.signal.emit(self.format(record)) -class GuiLogger (Logger): - """Delegate log URLs to the UI tree widget.""" +class SignalLogger (Logger): + """Use Qt signals for logged URLs and statistics.""" def __init__ (self, **args): """Store signals for URL and statistic data.""" - super(GuiLogger, self).__init__(**args) + super(SignalLogger, self).__init__(**args) self.log_url_signal = args["signal"] self.log_stats_signal = args["stats"] diff --git a/linkcheck/lc_cgi.py b/linkcheck/lc_cgi.py index 049bf463..b9e890f3 100644 --- a/linkcheck/lc_cgi.py +++ b/linkcheck/lc_cgi.py @@ -15,19 +15,45 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """ -Common functions used by the CGI and WSGI scripts. +Functions used by the WSGI script. """ -import sys +import cgi import os +import threading +from StringIO import StringIO import locale import re import time import urlparse import types -from . import configuration, strformat, checker, director, i18n -from . import add_intern_pattern, get_link_pat, init_i18n -from . import url as urlutil +from PyQt4 import QtCore +from . import configuration, strformat, checker, director, \ + add_intern_pattern, get_link_pat, init_i18n, url as urlutil +from .logger import Logger + + +def application(environ, start_response): + """WSGI interface: start an URL check.""" + # the environment variable CONTENT_LENGTH may be empty or missing + try: + request_body_size = int(environ.get('CONTENT_LENGTH', 0)) + except ValueError: + request_body_size = 0 + + # When the method is POST the query string will be sent + # in the HTTP request body which is passed by the WSGI server + # in the file like wsgi.input environment variable. + if request_body_size > 0: + request_body = environ['wsgi.input'].read(request_body_size) + else: + request_body = environ['wsgi.input'].read() + form = cgi.parse_qs(request_body) + + status = '200 OK' + start_response(status, get_response_headers()) + for output in checklink(form=form, env=environ): + yield output _logfile = None @@ -46,46 +72,152 @@ class LCFormError (StandardError): def get_response_headers(): + """Get list of response headers in key-value form.""" return [("Content-type", "text/html"), ("Cache-Control", "no-cache"), ("Pragma:", "no-cache") ] -def startoutput (out=None): - """Print leading HTML headers to given output stream.""" - if out is None: - out = i18n.get_encoded_writer() - for key, value in get_response_headers(): - out.write("%s: %s\r\n" % (key, value)) - out.write("\r\n") - def formvalue (form, key): + """Get value with given key from WSGI form.""" field = form.get(key) - if field is not None and hasattr(field, 'value'): - # it's a CGI FormField - field = field.value - else: - # assume WSGI dictionary lists + if isinstance(field, list): field = field[0] return field -def checklink (out=None, form=None, env=os.environ): - """Main cgi function, check the given links and print out the result.""" - if out is None: - out = i18n.get_encoded_writer() +class ThreadsafeStringIO (StringIO): + """Thread-safe String I/O class.""" + def __init__(self): + self.buf = [] + self.lock = threading.Lock() + self.closed = False + + def write (self, data): + self.lock.acquire() + try: + if self.closed: + raise IOError("Write on closed I/O object") + self.buf.append(data) + finally: + self.lock.release() + + def get_data (self): + self.lock.acquire() + try: + data = "".join(self.buf) + self.buf = [] + return data + finally: + self.lock.release() + + def close (self): + self.lock.acquire() + try: + self.buf = [] + self.closed = True + finally: + self.lock.release() + + +class SignalLogger (Logger): + """Use Qt signals for logged URLs and statistics.""" + + def __init__ (self, **args): + """Store signals for URL and statistic data.""" + super(SignalLogger, self).__init__(**args) + self.log_url_signal = args["signal"] + self.log_stats_signal = args["stats"] + + def start_fileoutput (self): + """Override fileoutput handling of base class.""" + pass + + def close_fileoutput (self): + """Override fileoutput handling of base class.""" + pass + + def log_url (self, url_data): + """Emit URL data which gets logged in the main window.""" + self.log_url_signal.emit(url_data) + + def end_output (self): + """Emit statistic data which gets logged in the main window.""" + self.log_stats_signal.emit(self.stats) + + +class DelegateLogger (QtCore.QObject): + """Logger using connected signals, delegating output to + another logger class.""" + log_url_signal = QtCore.pyqtSignal(object) + log_stats_signal = QtCore.pyqtSignal(object) + + def __init__ (self): + """Connect signals to this instance and init state.""" + super(DelegateLogger, self).__init__() + self.log_url_signal.connect(self.log_url) + self.log_stats_signal.connect(self.log_stats) + self.logger = None + self.finished = False + + def add_logger (self, logger): + """Delegate to given logger.""" + self.logger = logger + + def log_url (self, url_data): + """Delegate URL logging to internal logger.""" + self.logger.log_url(url_data) + + def log_stats (self, statistics): + """Delegate statistic logging to internal logger.""" + self.logger.stats = statistics + self.logger.end_output() + self.finished = True + + +def checklink (form=None, env=os.environ): + """Validates the CGI form and checks the given links.""" if form is None: form = {} try: checkform(form) - except LCFormError, why: + except LCFormError, errmsg: logit(form, env) - print_error(out, why) + yield print_error(errmsg) return + delegate_logger = DelegateLogger() + config = get_configuration(form, delegate_logger) + out = ThreadsafeStringIO() + html_logger = config.logger_new('html', fd=out) + delegate_logger.add_logger(html_logger) + url = strformat.stripurl(formvalue(form, "url")) + aggregate = director.get_aggregate(config) + url_data = checker.get_url_from(url, 0, aggregate) + try: + add_intern_pattern(url_data, config) + except UnicodeError, errmsg: + logit({}, env) + yield print_error(_("URL has unparsable domain name: %s") % errmsg) + return + aggregate.urlqueue.put(url_data) + html_logger.start_output() + # check in background + director.check_urls(aggregate) + while not delegate_logger.finished: + yield out.get_data() + time.sleep(2) + yield out.get_data() + out.close() + + +def get_configuration(form, logger): + """Initialize a CGI configuration.""" config = configuration.Configuration() config["recursionlevel"] = int(formvalue(form, "level")) - config["logger"] = config.logger_new('html', fd=out) + config.logger_add("signal", SignalLogger) + config["logger"] = config.logger_new('signal', + signal=logger.log_url_signal, stats=logger.log_stats_signal) config["threads"] = 0 if "anchors" in form: config["anchors"] = True @@ -94,20 +226,7 @@ def checklink (out=None, form=None, env=os.environ): # avoid checking of local files or other nasty stuff pat = "!^%s$" % urlutil.safe_url_pattern config["externlinks"].append(get_link_pat(pat, strict=True)) - # start checking - aggregate = director.get_aggregate(config) - get_url_from = checker.get_url_from - url = strformat.stripurl(formvalue(form, "url")) - url_data = get_url_from(url, 0, aggregate) - try: - add_intern_pattern(url_data, config) - except UnicodeError: - logit({}, env) - print_error(out, _("URL has unparsable domain name: %s") % \ - sys.exc_info()[1]) - return - aggregate.urlqueue.put(url_data) - director.check_urls(aggregate) + return config def get_host_name (form): @@ -165,7 +284,7 @@ def logit (form, env): _logfile.write(str(formvalue(form, key))+"\n") -def print_error (out, why): +def print_error (why): """Print standard error page.""" s = _(""" @@ -180,4 +299,5 @@ Errors are logged. """) % why - out.write(s.encode('iso-8859-1', 'ignore')) + return s.encode('iso-8859-1', 'ignore') + diff --git a/tests/test_cgi.py b/tests/test_cgi.py index 9266c8d8..9e343db5 100644 --- a/tests/test_cgi.py +++ b/tests/test_cgi.py @@ -18,59 +18,52 @@ Test cgi form routines. """ import unittest -import linkcheck.lc_cgi +import wsgiref +import urllib +from StringIO import StringIO +from wsgiref.util import setup_testing_defaults +from linkcheck.lc_cgi import checkform, checklink, LCFormError, application -class Store (object): - """ - Value storing class implementing FieldStorage interface. - """ - - def __init__ (self, value): - """ - Store given value. - """ - self.value = value - - -class TestCgi (unittest.TestCase): - """ - Test cgi routines. - """ +class TestWsgi (unittest.TestCase): + """Test wsgi application.""" def test_form_valid_url (self): # Check url validity. - form = {"url": Store("http://www.heise.de/"), - "level": Store("1"), - } - linkcheck.lc_cgi.checkform(form) + form = dict(url="http://www.example.com/", level="1") + checkform(form) def test_form_empty_url (self): # Check with empty url. - form = {"url": Store(""), - "level": Store("0"), - } - self.assertRaises(linkcheck.lc_cgi.LCFormError, - linkcheck.lc_cgi.checkform, form) + form = dict(url="", level="0") + self.assertRaises(LCFormError, checkform, form) def test_form_default_url (self): # Check with default url. - form = {"url": Store("http://"), - "level": Store("0"), - } - self.assertRaises(linkcheck.lc_cgi.LCFormError, - linkcheck.lc_cgi.checkform, form) + form = dict(url="http://", level="0") + self.assertRaises(LCFormError, checkform, form) def test_form_invalid_url (self): # Check url (in)validity. - form = {"url": Store("http://www.foo bar/"), - "level": Store("0"), - } - self.assertRaises(linkcheck.lc_cgi.LCFormError, - linkcheck.lc_cgi.checkform, form) + form = dict(url="http://www.foo bar/", level="0") + self.assertRaises(LCFormError, checkform, form) def test_checklink (self): - form = {"url": Store("http://www.heise.de/"), - "level": Store("0"), - } - linkcheck.lc_cgi.checklink(form=form) + form = dict(url="http://www.example.com/", level="0") + checklink(form) + + def test_application (self): + form = dict(url="http://www.example.com/", level="0") + formdata = urllib.urlencode(form) + environ = {'wsgi.input': StringIO(formdata)} + setup_testing_defaults(environ) + test_response = "" + test_headers = [None] + test_status = [None] + def start_response(status, headers): + test_status[0] = status + test_headers[0] = headers + for chunk in application(environ, start_response): + test_response += chunk + self.assertEqual(test_status[0], '200 OK') + self.assertTrue("Generated by LinkChecker" in test_response)