From d3d3529c2378b48e8792f836fba136cea7cf1d3b Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Fri, 6 Apr 2012 08:23:41 +0200 Subject: [PATCH] Added a WSGI script. --- cgi-bin/lc.wsgi | 45 +++++++++++++++++++++++++++++++++ cgi-bin/lconline/lc_cgi.html.de | 2 +- cgi-bin/lconline/lc_cgi.html.en | 2 +- config/linkchecker.apache2.conf | 9 +++++-- debian/control | 1 + debian/rules | 2 +- doc/changelog.txt | 5 +++- linkcheck/lc_cgi.py | 44 ++++++++++++++++++++++---------- 8 files changed, 90 insertions(+), 20 deletions(-) create mode 100644 cgi-bin/lc.wsgi diff --git a/cgi-bin/lc.wsgi b/cgi-bin/lc.wsgi new file mode 100644 index 00000000..d9f4b84a --- /dev/null +++ b/cgi-bin/lc.wsgi @@ -0,0 +1,45 @@ +#!/usr/bin/python +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2012 Bastian Kleineidam +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from cStringIO import StringIO +import cgi +import linkcheck +import linkcheck.lc_cgi + +def application(environ, start_response): + # the environment variable CONTENT_LENGTH may be empty or missing + try: + request_body_size = int(environ.get('CONTENT_LENGTH', 0)) + except (ValueError): + request_body_size = 0 + + # When the method is POST the query string will be sent + # in the HTTP request body which is passed by the WSGI server + # in the file like wsgi.input environment variable. + request_body = environ['wsgi.input'].read(request_body_size) + form = cgi.parse_qs(request_body) + + status = '200 OK' + start_response(status, linkcheck.lc_cgi.get_response_headers()) + output = StringIO() + # XXX this is slow since it checks the whole site before showing + # any out. + # Instead check in a separate thread and yield output as soon + # as it is available. + linkcheck.lc_cgi.checklink(form=form, out=output, env=environ) + return [output.getvalue()] diff --git a/cgi-bin/lconline/lc_cgi.html.de b/cgi-bin/lconline/lc_cgi.html.de index 4edb77c2..ae2551d6 100644 --- a/cgi-bin/lconline/lc_cgi.html.de +++ b/cgi-bin/lconline/lc_cgi.html.de @@ -28,7 +28,7 @@ target="_top">LinkChecker)
-
diff --git a/cgi-bin/lconline/lc_cgi.html.en b/cgi-bin/lconline/lc_cgi.html.en index 5f3c60d0..559c7db1 100644 --- a/cgi-bin/lconline/lc_cgi.html.en +++ b/cgi-bin/lconline/lc_cgi.html.en @@ -22,7 +22,7 @@ target="_top">LinkChecker)
- diff --git a/config/linkchecker.apache2.conf b/config/linkchecker.apache2.conf index faefd433..5dd8c99e 100644 --- a/config/linkchecker.apache2.conf +++ b/config/linkchecker.apache2.conf @@ -1,4 +1,7 @@ -ScriptAlias /lconline/lc.cgi /usr/lib/cgi-bin/lc.cgi + + +WSGIScriptAlias /lconlinewsgi/lc.wsgi /usr/lib/cgi-bin/lc.wsgi + Alias /lconline/ /usr/share/linkchecker/lconline/ # allow people to read the files @@ -8,7 +11,7 @@ Alias /lconline/ /usr/share/linkchecker/lconline/ Allow from all - + Order deny,allow Deny from all Allow from 127.0.0.0/255.0.0.0 @@ -18,3 +21,5 @@ Alias /lconline/ /usr/share/linkchecker/lconline/ # IPv6 addresses work only with apache2 Allow from ::1/128 + + diff --git a/debian/control b/debian/control index cda30f2b..88d85428 100644 --- a/debian/control +++ b/debian/control @@ -19,6 +19,7 @@ Depends: ${misc:Depends}, ${python:Depends}, ${shlibs:Depends} Provides: ${python:Provides} Conflicts: python-dnspython Suggests: apache2 | httpd, + libapache2-mod-wsgi, clamav-daemon, linkchecker-gui, python-cssutils, diff --git a/debian/rules b/debian/rules index f053fed7..d9fef737 100755 --- a/debian/rules +++ b/debian/rules @@ -37,7 +37,7 @@ override_dh_auto_install: install -m 644 cgi-bin/lconline/*.js $(HTMLDIR) install -m 644 cgi-bin/lconline/*.css $(HTMLDIR) install -m 644 cgi-bin/lconline/*.ico $(HTMLDIR) - install -m 755 cgi-bin/lc.cgi $(CGIDIR) + install -m 755 cgi-bin/lc.wsgi $(CGIDIR) # install bash_completion script install -m 644 config/linkchecker-completion $(ETCDIR)/bash_completion.d # install .mo files diff --git a/doc/changelog.txt b/doc/changelog.txt index c88c8c30..a5117266 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -4,9 +4,12 @@ Fixes: - checking: Detect invalid empty cookie values. Patch by Charles Jones. Closes: SF bug #3514219 -- checking: fix cache key for URL connections on redirect +- checking: Fix cache key for URL connections on redirect. Closes: SF bug #3514748 +Features: +- cgi: Added a WSGI script to replace the CGI script. + 7.6 "Türkisch für Anfänger" (released 31.03.2012) diff --git a/linkcheck/lc_cgi.py b/linkcheck/lc_cgi.py index 13df043b..049bf463 100644 --- a/linkcheck/lc_cgi.py +++ b/linkcheck/lc_cgi.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2011 Bastian Kleineidam +# Copyright (C) 2000-2012 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,7 +15,7 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """ -Common CGI functions used by the CGI scripts. +Common functions used by the CGI and WSGI scripts. """ import sys @@ -45,14 +45,30 @@ class LCFormError (StandardError): pass +def get_response_headers(): + return [("Content-type", "text/html"), + ("Cache-Control", "no-cache"), + ("Pragma:", "no-cache") + ] + def startoutput (out=None): """Print leading HTML headers to given output stream.""" if out is None: out = i18n.get_encoded_writer() - out.write("Content-type: text/html\r\n" - "Cache-Control: no-cache\r\n" - "Pragma: no-cache\r\n" - "\r\n") + for key, value in get_response_headers(): + out.write("%s: %s\r\n" % (key, value)) + out.write("\r\n") + + +def formvalue (form, key): + field = form.get(key) + if field is not None and hasattr(field, 'value'): + # it's a CGI FormField + field = field.value + else: + # assume WSGI dictionary lists + field = field[0] + return field def checklink (out=None, form=None, env=os.environ): @@ -68,7 +84,7 @@ def checklink (out=None, form=None, env=os.environ): print_error(out, why) return config = configuration.Configuration() - config["recursionlevel"] = int(form["level"].value) + config["recursionlevel"] = int(formvalue(form, "level")) config["logger"] = config.logger_new('html', fd=out) config["threads"] = 0 if "anchors" in form: @@ -81,7 +97,7 @@ def checklink (out=None, form=None, env=os.environ): # start checking aggregate = director.get_aggregate(config) get_url_from = checker.get_url_from - url = strformat.stripurl(form["url"].value) + url = strformat.stripurl(formvalue(form, "url")) url_data = get_url_from(url, 0, aggregate) try: add_intern_pattern(url_data, config) @@ -96,7 +112,7 @@ def checklink (out=None, form=None, env=os.environ): def get_host_name (form): """Return host name of given URL.""" - return urlparse.urlparse(form["url"].value)[1] + return urlparse.urlparse(formvalue(form, "url"))[1] def checkform (form): @@ -105,7 +121,7 @@ def checkform (form): only plain strings as exception text.""" # check lang support if "language" in form: - lang = form['language'].value + lang = formvalue(form, 'language') if lang in _supported_langs: locale.setlocale(locale.LC_ALL, lang_locale[lang]) init_i18n() @@ -113,7 +129,7 @@ def checkform (form): raise LCFormError(_("unsupported language")) # check url syntax if "url" in form: - url = form["url"].value + url = formvalue(form, "url") if not url or url == "http://": raise LCFormError(_("empty url was given")) if not urlutil.is_safe_url(url): @@ -122,13 +138,13 @@ def checkform (form): raise LCFormError(_("no url was given")) # check recursion level if "level" in form: - level = form["level"].value + level = formvalue(form, "level") if not _is_level(level): raise LCFormError(_("invalid recursion level")) # check options for option in ("anchors", "errors", "intern"): if option in form: - if not form[option].value == "on": + if not formvalue(form, option) == "on": raise LCFormError(_("invalid %s option syntax") % option) @@ -146,7 +162,7 @@ def logit (form, env): _logfile.write(var+"="+env[var]+"\n") for key in ("level", "url", "anchors", "errors", "intern", "language"): if key in form: - _logfile.write(str(form[key])+"\n") + _logfile.write(str(formvalue(form, key))+"\n") def print_error (out, why):