From b17648a3db97eac510127f721711b4021bcd28e9 Mon Sep 17 00:00:00 2001 From: calvin Date: Sun, 26 Mar 2000 18:53:23 +0000 Subject: [PATCH] See ChangeLog git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@44 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- ChangeLog | 4 +++ README | 2 ++ lc.cgi | 53 ++++------------------------------- lc.fcgi | 64 ++++++++----------------------------------- lc.sz_fcgi | 38 +++++++++++++++++++++++++ linkcheck/__init__.py | 2 +- linkcheck/lc_cgi.py | 43 +++++++++++++++++++++++++++++ sz_fcgi.py | 33 ++++------------------ 8 files changed, 110 insertions(+), 129 deletions(-) create mode 100644 lc.sz_fcgi create mode 100644 linkcheck/lc_cgi.py diff --git a/ChangeLog b/ChangeLog index 75bb9006..f750d241 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +26.3.2000 + * FastCGI fixes + * simplified sz_fcgi.py + 25.3.2000 * FastCGI script fixes * do not close the logging file descriptor diff --git a/README b/README index 21dd1a0f..490abb7e 100644 --- a/README +++ b/README @@ -41,3 +41,5 @@ fcgi.py and sz_fcgi.py from http://saarland.sz-sb.de/~ajung/sz_fcgi/ Note that the following packages are modified by me: httplib.py (renamed to http11lib.py) distutils +fcgi.py +sz_fcgi.py diff --git a/lc.cgi b/lc.cgi index a6e785e9..33812c0c 100755 --- a/lc.cgi +++ b/lc.cgi @@ -4,57 +4,15 @@ import re,cgi,sys,urlparse,time,os # configuration sys.stderr = sys.stdout -cgi_dir = "/home/calvin/public_html/cgi-bin" dist_dir = "/home/calvin/projects/linkchecker" sys.path.insert(0,dist_dir) -cgi.logfile = cgi_dir + "/linkchecker.log" # must be an existing file +cgi.logfile = "linkchecker.log" # must be an existing file # end configuration def testit(): cgi.test() sys.exit(0) -def checkform(form): - for key in ["level","url"]: - if not form.has_key(key) or form[key].value == "": return 0 - if not re.match(r"^http://[-\w./~]+$", form["url"].value): return 0 - if not re.match(r"\d", form["level"].value): return 0 - if int(form["level"].value) > 3: return 0 - if form.has_key("anchors"): - if not form["anchors"].value=="on": return 0 - if form.has_key("errors"): - if not form["errors"].value=="on": return 0 - if form.has_key("intern"): - if not form["intern"].value=="on": return 0 - return 1 - -def getHostName(): - return urlparse.urlparse(form["url"].value)[1] - -def logit(form): - cgi.log("\n"+time.strftime("%d.%m.%Y %H:%M:%S", time.localtime(time.time()))) - for var in ["HTTP_USER_AGENT","REMOTE_ADDR","REMOTE_HOST","REMOTE_PORT"]: - if os.environ.has_key(var): - cgi.log(var+"="+os.environ[var]) - for key in ["level","url","anchors","errors","intern"]: - if form.has_key(key): - cgi.log(str(form[key])) - -def printError(): - print """ - -
-Error
-The LinkChecker Online script has encountered an error. Please ensure -that your provided URL link begins with http:// and -contains only these characters: A-Za-z0-9./_~-

-Errors are logged. -
- - -""" - import linkcheck # main @@ -63,9 +21,9 @@ print "Cache-Control: no-cache" print #testit() form = cgi.FieldStorage() -if not checkform(form): - logit(form) - printError() +if not linkcheck.lc_cgi.checkform(form): + linkcheck.lc_cgi.logit(form) + linkcheck.lc_cgi.printError(sys.stdout) sys.exit(0) config = linkcheck.Config.Configuration() config["recursionlevel"] = int(form["level"].value) @@ -73,7 +31,8 @@ config["log"] = linkcheck.Logging.HtmlLogger() if form.has_key("anchors"): config["anchors"] = 1 if not form.has_key("errors"): config["verbose"] = 1 if form.has_key("intern"): - config["internlinks"].append(re.compile("^(ftp|https?)://"+getHostName())) + config["internlinks"].append(re.compile("^(ftp|https?)://"+\ + linkcheck.lc_cgi.getHostName(form))) else: config["internlinks"].append(re.compile(".+")) # avoid checking of local files diff --git a/lc.fcgi b/lc.fcgi index 015a411b..b2938233 100644 --- a/lc.fcgi +++ b/lc.fcgi @@ -1,74 +1,32 @@ #!/usr/bin/env python -import re, sys, time, os, urlparse - -def checkform(form): - for key in ["level","url"]: - if not form.has_key(key) or form[key].value == "": return 0 - if not re.match(r"^http://[-\w./~]+$", form["url"].value): return 0 - if not re.match(r"\d", form["level"].value): return 0 - if int(form["level"].value) > 3: return 0 - if form.has_key("anchors"): - if not form["anchors"].value=="on": return 0 - if form.has_key("errors"): - if not form["errors"].value=="on": return 0 - if form.has_key("intern"): - if not form["intern"].value=="on": return 0 - return 1 - -def getHostName(): - return urlparse.urlparse(form["url"].value)[1] - -def logit(form, req): - log = open("linkchecker.log","a") - log.write("\n"+time.strftime("%d.%m.%Y %H:%M:%S", time.localtime(time.time()))) - for var in ["HTTP_USER_AGENT","REMOTE_ADDR","REMOTE_HOST","REMOTE_PORT"]: - if req.env.has_key(var): - log.write(var+"="+req.env[var]) - for key in ["level","url","anchors","errors","intern"]: - if form.has_key(key): - log.write(str(form[key])) - log.close() - -def printError(req): - req.out.write(""" - -
-Error
-The LinkChecker Online script has encountered an error. Please ensure -that your provided URL link begins with http:// and -contains only these characters: A-Za-z0-9./_~-

-Errors are logged. -
- -""") - req.Finish() - +import sys, re dist_dir = "/home/calvin/projects/linkchecker" sys.path.insert(0,dist_dir) import fcgi, linkcheck # main try: - while isFCGI(): - req = FCGI() + while fcgi.isFCGI(): + req = fcgi.FCGI() req.out.write("Content-type: text/html\r\n" "Cache-Control: no-cache\r\n" "\r\n") form = req.getFieldStorage() - if not checkform(form): - logit(form, req) - printError(req) + if not linkcheck.lc_cgi.checkform(form): + linkcheck.lc_cgi.logit(form, req.env) + linkcheck.lc_cgi.printError(req.out) + req.Finish() continue config = linkcheck.Config.Configuration() config["recursionlevel"] = int(form["level"].value) - config["log"] = linkcheck.Logging.HtmlLogger() + config["log"] = linkcheck.Logging.HtmlLogger(req.out) config.disableThreading() if form.has_key("anchors"): config["anchors"] = 1 if not form.has_key("errors"): config["verbose"] = 1 if form.has_key("intern"): - config["internlinks"].append(re.compile("^(ftp|https?)://"+getHostName())) + config["internlinks"].append(re.compile("^(ftp|https?)://"+\ + linkcheck.lc_cgi.getHostName(form))) else: config["internlinks"].append(re.compile(".+")) # avoid checking of local files @@ -79,5 +37,5 @@ try: req.Finish() except: import traceback - traceback.print_exc(file = open('traceback', 'w')) + traceback.print_exc(file = open('traceback', 'a')) diff --git a/lc.sz_fcgi b/lc.sz_fcgi new file mode 100644 index 00000000..0ab02de6 --- /dev/null +++ b/lc.sz_fcgi @@ -0,0 +1,38 @@ +#!/usr/bin/env python +import sys,re,thread + +dist_dir = "/home/calvin/projects/linkchecker" +sys.path.insert(0,dist_dir) +import sz_fcgi, linkcheck + +def func(fcg, req): + req.out.write("Content-type: text/html\r\n" + "Cache-Control: no-cache\r\n" + "\r\n") + form = req.getFieldStorage() + if not linkcheck.lc_cgi.checkform(form): + linkcheck.lc_cgi.logit(form, req.env) + linkcheck.lc_cgi.printError(req.out) + req.Finish() + thread.exit() + config = linkcheck.Config.Configuration() + config["recursionlevel"] = int(form["level"].value) + config["log"] = linkcheck.Logging.HtmlLogger(req.out) + config.disableThreading() + if form.has_key("anchors"): config["anchors"] = 1 + if not form.has_key("errors"): config["verbose"] = 1 + if form.has_key("intern"): + config["internlinks"].append(re.compile("^(ftp|https?)://"+\ + linkcheck.lc_cgi.getHostName(form))) + else: + config["internlinks"].append(re.compile(".+")) + # avoid checking of local files + config["externlinks"].append((re.compile("^file:"), 1)) + # start checking + config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(form["url"].value, 0)) + linkcheck.checkUrls(config) + req.Finish() + thread.exit() + +# main +sz_fcgi.SZ_FCGI(func).run() diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index 13774fcc..5e5f6c11 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -1,6 +1,6 @@ # __init__.py for this module -import Config,UrlData,OutputReader,sys +import Config,UrlData,OutputReader,sys,lc_cgi def checkUrls(config = Config.Configuration()): config.log_init() diff --git a/linkcheck/lc_cgi.py b/linkcheck/lc_cgi.py new file mode 100644 index 00000000..c25d7831 --- /dev/null +++ b/linkcheck/lc_cgi.py @@ -0,0 +1,43 @@ +import re,time,urlparse + +def checkform(form): + for key in ["level","url"]: + if not form.has_key(key) or form[key].value == "": return 0 + if not re.match(r"^http://[-\w./~]+$", form["url"].value): return 0 + if not re.match(r"\d", form["level"].value): return 0 + if int(form["level"].value) > 3: return 0 + if form.has_key("anchors"): + if not form["anchors"].value=="on": return 0 + if form.has_key("errors"): + if not form["errors"].value=="on": return 0 + if form.has_key("intern"): + if not form["intern"].value=="on": return 0 + return 1 + +def getHostName(form): + return urlparse.urlparse(form["url"].value)[1] + +def logit(form, env): + log = open("linkchecker.log","a") + log.write("\n"+time.strftime("%d.%m.%Y %H:%M:%S", time.localtime(time.time()))+"\n") + for var in ["HTTP_USER_AGENT","REMOTE_ADDR","REMOTE_HOST","REMOTE_PORT"]: + if env.has_key(var): + log.write(var+"="+env[var]+"\n") + for key in ["level","url","anchors","errors","intern"]: + if form.has_key(key): + log.write(str(form[key])+"\n") + log.close() + +def printError(out): + out.write(""" + +
+Error
+The LinkChecker Online script has encountered an error. Please ensure +that your provided URL link begins with http:// and +contains only these characters: A-Za-z0-9./_~-

+Errors are logged. +
+ +""") diff --git a/sz_fcgi.py b/sz_fcgi.py index 5e9f4520..a5cb9196 100644 --- a/sz_fcgi.py +++ b/sz_fcgi.py @@ -6,38 +6,15 @@ __doc__ = "Multithreaded FastCGI Wrapper" import sys,thread,fcgi class SZ_FCGI: - def __init__(self,func): self.func = func - self.handles = {} # create a new thread to handle requests def run(self): - while fcgi.isFCGI(): - req = fcgi.FCGI() - thread.start_new_thread(self.handle_request,(req,0)) - - # Finish thread and send all data back to the FCGI parent - def finish(self): - req = self.handles[thread.get_ident()] - req.Finish() - thread.exit() - - # Call function - handled by a thread - def handle_request(self,*args): - req = args[0] - self.handles[thread.get_ident()] = req try: - self.func(self,req.env,req.getFieldStorage()) + while fcgi.isFCGI(): + req = fcgi.FCGI() + thread.start_new_thread(self.func,(self, req)) except: - write_traceback() - - # Our own FCGI print routine - def print(self,*args): - req = self.handles[thread.get_ident()] - for s in args: - req.out.write(str(s)) - req.out.flush() - - def println(self,*args): - req = self.handles[thread.get_ident()] + import traceback + traceback.print_exc(file = open('traceback', 'a'))