See ChangeLog

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@44 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2000-03-26 18:53:23 +00:00
parent e690c427a3
commit b17648a3db
8 changed files with 110 additions and 129 deletions

View file

@ -1,3 +1,7 @@
26.3.2000
* FastCGI fixes
* simplified sz_fcgi.py
25.3.2000
* FastCGI script fixes
* do not close the logging file descriptor

2
README
View file

@ -41,3 +41,5 @@ fcgi.py and sz_fcgi.py from http://saarland.sz-sb.de/~ajung/sz_fcgi/
Note that the following packages are modified by me:
httplib.py (renamed to http11lib.py)
distutils
fcgi.py
sz_fcgi.py

53
lc.cgi
View file

@ -4,57 +4,15 @@ import re,cgi,sys,urlparse,time,os
# configuration
sys.stderr = sys.stdout
cgi_dir = "/home/calvin/public_html/cgi-bin"
dist_dir = "/home/calvin/projects/linkchecker"
sys.path.insert(0,dist_dir)
cgi.logfile = cgi_dir + "/linkchecker.log" # must be an existing file
cgi.logfile = "linkchecker.log" # must be an existing file
# end configuration
def testit():
cgi.test()
sys.exit(0)
def checkform(form):
for key in ["level","url"]:
if not form.has_key(key) or form[key].value == "": return 0
if not re.match(r"^http://[-\w./~]+$", form["url"].value): return 0
if not re.match(r"\d", form["level"].value): return 0
if int(form["level"].value) > 3: return 0
if form.has_key("anchors"):
if not form["anchors"].value=="on": return 0
if form.has_key("errors"):
if not form["errors"].value=="on": return 0
if form.has_key("intern"):
if not form["intern"].value=="on": return 0
return 1
def getHostName():
return urlparse.urlparse(form["url"].value)[1]
def logit(form):
cgi.log("\n"+time.strftime("%d.%m.%Y %H:%M:%S", time.localtime(time.time())))
for var in ["HTTP_USER_AGENT","REMOTE_ADDR","REMOTE_HOST","REMOTE_PORT"]:
if os.environ.has_key(var):
cgi.log(var+"="+os.environ[var])
for key in ["level","url","anchors","errors","intern"]:
if form.has_key(key):
cgi.log(str(form[key]))
def printError():
print """<html><head></head>
<body text="#192c83" bgcolor="#fff7e5" link="#191c83" vlink="#191c83"
alink="#191c83">
<blockquote>
<b>Error</b><br>
The LinkChecker Online script has encountered an error. Please ensure
that your provided URL link begins with <code>http://</code> and
contains only these characters: <code>A-Za-z0-9./_~-</code><br><br>
Errors are logged.
</blockquote>
</body>
</html>
"""
import linkcheck
# main
@ -63,9 +21,9 @@ print "Cache-Control: no-cache"
print
#testit()
form = cgi.FieldStorage()
if not checkform(form):
logit(form)
printError()
if not linkcheck.lc_cgi.checkform(form):
linkcheck.lc_cgi.logit(form)
linkcheck.lc_cgi.printError(sys.stdout)
sys.exit(0)
config = linkcheck.Config.Configuration()
config["recursionlevel"] = int(form["level"].value)
@ -73,7 +31,8 @@ config["log"] = linkcheck.Logging.HtmlLogger()
if form.has_key("anchors"): config["anchors"] = 1
if not form.has_key("errors"): config["verbose"] = 1
if form.has_key("intern"):
config["internlinks"].append(re.compile("^(ftp|https?)://"+getHostName()))
config["internlinks"].append(re.compile("^(ftp|https?)://"+\
linkcheck.lc_cgi.getHostName(form)))
else:
config["internlinks"].append(re.compile(".+"))
# avoid checking of local files

64
lc.fcgi
View file

@ -1,74 +1,32 @@
#!/usr/bin/env python
import re, sys, time, os, urlparse
def checkform(form):
for key in ["level","url"]:
if not form.has_key(key) or form[key].value == "": return 0
if not re.match(r"^http://[-\w./~]+$", form["url"].value): return 0
if not re.match(r"\d", form["level"].value): return 0
if int(form["level"].value) > 3: return 0
if form.has_key("anchors"):
if not form["anchors"].value=="on": return 0
if form.has_key("errors"):
if not form["errors"].value=="on": return 0
if form.has_key("intern"):
if not form["intern"].value=="on": return 0
return 1
def getHostName():
return urlparse.urlparse(form["url"].value)[1]
def logit(form, req):
log = open("linkchecker.log","a")
log.write("\n"+time.strftime("%d.%m.%Y %H:%M:%S", time.localtime(time.time())))
for var in ["HTTP_USER_AGENT","REMOTE_ADDR","REMOTE_HOST","REMOTE_PORT"]:
if req.env.has_key(var):
log.write(var+"="+req.env[var])
for key in ["level","url","anchors","errors","intern"]:
if form.has_key(key):
log.write(str(form[key]))
log.close()
def printError(req):
req.out.write("""<html><head></head>
<body text="#192c83" bgcolor="#fff7e5" link="#191c83" vlink="#191c83"
alink="#191c83">
<blockquote>
<b>Error</b><br>
The LinkChecker Online script has encountered an error. Please ensure
that your provided URL link begins with <code>http://</code> and
contains only these characters: <code>A-Za-z0-9./_~-</code><br><br>
Errors are logged.
</blockquote>
</body>
</html>""")
req.Finish()
import sys, re
dist_dir = "/home/calvin/projects/linkchecker"
sys.path.insert(0,dist_dir)
import fcgi, linkcheck
# main
try:
while isFCGI():
req = FCGI()
while fcgi.isFCGI():
req = fcgi.FCGI()
req.out.write("Content-type: text/html\r\n"
"Cache-Control: no-cache\r\n"
"\r\n")
form = req.getFieldStorage()
if not checkform(form):
logit(form, req)
printError(req)
if not linkcheck.lc_cgi.checkform(form):
linkcheck.lc_cgi.logit(form, req.env)
linkcheck.lc_cgi.printError(req.out)
req.Finish()
continue
config = linkcheck.Config.Configuration()
config["recursionlevel"] = int(form["level"].value)
config["log"] = linkcheck.Logging.HtmlLogger()
config["log"] = linkcheck.Logging.HtmlLogger(req.out)
config.disableThreading()
if form.has_key("anchors"): config["anchors"] = 1
if not form.has_key("errors"): config["verbose"] = 1
if form.has_key("intern"):
config["internlinks"].append(re.compile("^(ftp|https?)://"+getHostName()))
config["internlinks"].append(re.compile("^(ftp|https?)://"+\
linkcheck.lc_cgi.getHostName(form)))
else:
config["internlinks"].append(re.compile(".+"))
# avoid checking of local files
@ -79,5 +37,5 @@ try:
req.Finish()
except:
import traceback
traceback.print_exc(file = open('traceback', 'w'))
traceback.print_exc(file = open('traceback', 'a'))

38
lc.sz_fcgi Normal file
View file

@ -0,0 +1,38 @@
#!/usr/bin/env python
import sys,re,thread
dist_dir = "/home/calvin/projects/linkchecker"
sys.path.insert(0,dist_dir)
import sz_fcgi, linkcheck
def func(fcg, req):
req.out.write("Content-type: text/html\r\n"
"Cache-Control: no-cache\r\n"
"\r\n")
form = req.getFieldStorage()
if not linkcheck.lc_cgi.checkform(form):
linkcheck.lc_cgi.logit(form, req.env)
linkcheck.lc_cgi.printError(req.out)
req.Finish()
thread.exit()
config = linkcheck.Config.Configuration()
config["recursionlevel"] = int(form["level"].value)
config["log"] = linkcheck.Logging.HtmlLogger(req.out)
config.disableThreading()
if form.has_key("anchors"): config["anchors"] = 1
if not form.has_key("errors"): config["verbose"] = 1
if form.has_key("intern"):
config["internlinks"].append(re.compile("^(ftp|https?)://"+\
linkcheck.lc_cgi.getHostName(form)))
else:
config["internlinks"].append(re.compile(".+"))
# avoid checking of local files
config["externlinks"].append((re.compile("^file:"), 1))
# start checking
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(form["url"].value, 0))
linkcheck.checkUrls(config)
req.Finish()
thread.exit()
# main
sz_fcgi.SZ_FCGI(func).run()

View file

@ -1,6 +1,6 @@
# __init__.py for this module
import Config,UrlData,OutputReader,sys
import Config,UrlData,OutputReader,sys,lc_cgi
def checkUrls(config = Config.Configuration()):
config.log_init()

43
linkcheck/lc_cgi.py Normal file
View file

@ -0,0 +1,43 @@
import re,time,urlparse
def checkform(form):
for key in ["level","url"]:
if not form.has_key(key) or form[key].value == "": return 0
if not re.match(r"^http://[-\w./~]+$", form["url"].value): return 0
if not re.match(r"\d", form["level"].value): return 0
if int(form["level"].value) > 3: return 0
if form.has_key("anchors"):
if not form["anchors"].value=="on": return 0
if form.has_key("errors"):
if not form["errors"].value=="on": return 0
if form.has_key("intern"):
if not form["intern"].value=="on": return 0
return 1
def getHostName(form):
return urlparse.urlparse(form["url"].value)[1]
def logit(form, env):
log = open("linkchecker.log","a")
log.write("\n"+time.strftime("%d.%m.%Y %H:%M:%S", time.localtime(time.time()))+"\n")
for var in ["HTTP_USER_AGENT","REMOTE_ADDR","REMOTE_HOST","REMOTE_PORT"]:
if env.has_key(var):
log.write(var+"="+env[var]+"\n")
for key in ["level","url","anchors","errors","intern"]:
if form.has_key(key):
log.write(str(form[key])+"\n")
log.close()
def printError(out):
out.write("""<html><head></head>
<body text="#192c83" bgcolor="#fff7e5" link="#191c83" vlink="#191c83"
alink="#191c83">
<blockquote>
<b>Error</b><br>
The LinkChecker Online script has encountered an error. Please ensure
that your provided URL link begins with <code>http://</code> and
contains only these characters: <code>A-Za-z0-9./_~-</code><br><br>
Errors are logged.
</blockquote>
</body>
</html>""")

View file

@ -6,38 +6,15 @@ __doc__ = "Multithreaded FastCGI Wrapper"
import sys,thread,fcgi
class SZ_FCGI:
def __init__(self,func):
self.func = func
self.handles = {}
# create a new thread to handle requests
def run(self):
while fcgi.isFCGI():
req = fcgi.FCGI()
thread.start_new_thread(self.handle_request,(req,0))
# Finish thread and send all data back to the FCGI parent
def finish(self):
req = self.handles[thread.get_ident()]
req.Finish()
thread.exit()
# Call function - handled by a thread
def handle_request(self,*args):
req = args[0]
self.handles[thread.get_ident()] = req
try:
self.func(self,req.env,req.getFieldStorage())
while fcgi.isFCGI():
req = fcgi.FCGI()
thread.start_new_thread(self.func,(self, req))
except:
write_traceback()
# Our own FCGI print routine
def print(self,*args):
req = self.handles[thread.get_ident()]
for s in args:
req.out.write(str(s))
req.out.flush()
def println(self,*args):
req = self.handles[thread.get_ident()]
import traceback
traceback.print_exc(file = open('traceback', 'a'))