mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-09 15:14:45 +00:00
See ChangeLog
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@44 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
e690c427a3
commit
b17648a3db
8 changed files with 110 additions and 129 deletions
|
|
@ -1,3 +1,7 @@
|
|||
26.3.2000
|
||||
* FastCGI fixes
|
||||
* simplified sz_fcgi.py
|
||||
|
||||
25.3.2000
|
||||
* FastCGI script fixes
|
||||
* do not close the logging file descriptor
|
||||
|
|
|
|||
2
README
2
README
|
|
@ -41,3 +41,5 @@ fcgi.py and sz_fcgi.py from http://saarland.sz-sb.de/~ajung/sz_fcgi/
|
|||
Note that the following packages are modified by me:
|
||||
httplib.py (renamed to http11lib.py)
|
||||
distutils
|
||||
fcgi.py
|
||||
sz_fcgi.py
|
||||
|
|
|
|||
53
lc.cgi
53
lc.cgi
|
|
@ -4,57 +4,15 @@ import re,cgi,sys,urlparse,time,os
|
|||
|
||||
# configuration
|
||||
sys.stderr = sys.stdout
|
||||
cgi_dir = "/home/calvin/public_html/cgi-bin"
|
||||
dist_dir = "/home/calvin/projects/linkchecker"
|
||||
sys.path.insert(0,dist_dir)
|
||||
cgi.logfile = cgi_dir + "/linkchecker.log" # must be an existing file
|
||||
cgi.logfile = "linkchecker.log" # must be an existing file
|
||||
# end configuration
|
||||
|
||||
def testit():
|
||||
cgi.test()
|
||||
sys.exit(0)
|
||||
|
||||
def checkform(form):
|
||||
for key in ["level","url"]:
|
||||
if not form.has_key(key) or form[key].value == "": return 0
|
||||
if not re.match(r"^http://[-\w./~]+$", form["url"].value): return 0
|
||||
if not re.match(r"\d", form["level"].value): return 0
|
||||
if int(form["level"].value) > 3: return 0
|
||||
if form.has_key("anchors"):
|
||||
if not form["anchors"].value=="on": return 0
|
||||
if form.has_key("errors"):
|
||||
if not form["errors"].value=="on": return 0
|
||||
if form.has_key("intern"):
|
||||
if not form["intern"].value=="on": return 0
|
||||
return 1
|
||||
|
||||
def getHostName():
|
||||
return urlparse.urlparse(form["url"].value)[1]
|
||||
|
||||
def logit(form):
|
||||
cgi.log("\n"+time.strftime("%d.%m.%Y %H:%M:%S", time.localtime(time.time())))
|
||||
for var in ["HTTP_USER_AGENT","REMOTE_ADDR","REMOTE_HOST","REMOTE_PORT"]:
|
||||
if os.environ.has_key(var):
|
||||
cgi.log(var+"="+os.environ[var])
|
||||
for key in ["level","url","anchors","errors","intern"]:
|
||||
if form.has_key(key):
|
||||
cgi.log(str(form[key]))
|
||||
|
||||
def printError():
|
||||
print """<html><head></head>
|
||||
<body text="#192c83" bgcolor="#fff7e5" link="#191c83" vlink="#191c83"
|
||||
alink="#191c83">
|
||||
<blockquote>
|
||||
<b>Error</b><br>
|
||||
The LinkChecker Online script has encountered an error. Please ensure
|
||||
that your provided URL link begins with <code>http://</code> and
|
||||
contains only these characters: <code>A-Za-z0-9./_~-</code><br><br>
|
||||
Errors are logged.
|
||||
</blockquote>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
import linkcheck
|
||||
|
||||
# main
|
||||
|
|
@ -63,9 +21,9 @@ print "Cache-Control: no-cache"
|
|||
print
|
||||
#testit()
|
||||
form = cgi.FieldStorage()
|
||||
if not checkform(form):
|
||||
logit(form)
|
||||
printError()
|
||||
if not linkcheck.lc_cgi.checkform(form):
|
||||
linkcheck.lc_cgi.logit(form)
|
||||
linkcheck.lc_cgi.printError(sys.stdout)
|
||||
sys.exit(0)
|
||||
config = linkcheck.Config.Configuration()
|
||||
config["recursionlevel"] = int(form["level"].value)
|
||||
|
|
@ -73,7 +31,8 @@ config["log"] = linkcheck.Logging.HtmlLogger()
|
|||
if form.has_key("anchors"): config["anchors"] = 1
|
||||
if not form.has_key("errors"): config["verbose"] = 1
|
||||
if form.has_key("intern"):
|
||||
config["internlinks"].append(re.compile("^(ftp|https?)://"+getHostName()))
|
||||
config["internlinks"].append(re.compile("^(ftp|https?)://"+\
|
||||
linkcheck.lc_cgi.getHostName(form)))
|
||||
else:
|
||||
config["internlinks"].append(re.compile(".+"))
|
||||
# avoid checking of local files
|
||||
|
|
|
|||
64
lc.fcgi
64
lc.fcgi
|
|
@ -1,74 +1,32 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import re, sys, time, os, urlparse
|
||||
|
||||
def checkform(form):
|
||||
for key in ["level","url"]:
|
||||
if not form.has_key(key) or form[key].value == "": return 0
|
||||
if not re.match(r"^http://[-\w./~]+$", form["url"].value): return 0
|
||||
if not re.match(r"\d", form["level"].value): return 0
|
||||
if int(form["level"].value) > 3: return 0
|
||||
if form.has_key("anchors"):
|
||||
if not form["anchors"].value=="on": return 0
|
||||
if form.has_key("errors"):
|
||||
if not form["errors"].value=="on": return 0
|
||||
if form.has_key("intern"):
|
||||
if not form["intern"].value=="on": return 0
|
||||
return 1
|
||||
|
||||
def getHostName():
|
||||
return urlparse.urlparse(form["url"].value)[1]
|
||||
|
||||
def logit(form, req):
|
||||
log = open("linkchecker.log","a")
|
||||
log.write("\n"+time.strftime("%d.%m.%Y %H:%M:%S", time.localtime(time.time())))
|
||||
for var in ["HTTP_USER_AGENT","REMOTE_ADDR","REMOTE_HOST","REMOTE_PORT"]:
|
||||
if req.env.has_key(var):
|
||||
log.write(var+"="+req.env[var])
|
||||
for key in ["level","url","anchors","errors","intern"]:
|
||||
if form.has_key(key):
|
||||
log.write(str(form[key]))
|
||||
log.close()
|
||||
|
||||
def printError(req):
|
||||
req.out.write("""<html><head></head>
|
||||
<body text="#192c83" bgcolor="#fff7e5" link="#191c83" vlink="#191c83"
|
||||
alink="#191c83">
|
||||
<blockquote>
|
||||
<b>Error</b><br>
|
||||
The LinkChecker Online script has encountered an error. Please ensure
|
||||
that your provided URL link begins with <code>http://</code> and
|
||||
contains only these characters: <code>A-Za-z0-9./_~-</code><br><br>
|
||||
Errors are logged.
|
||||
</blockquote>
|
||||
</body>
|
||||
</html>""")
|
||||
req.Finish()
|
||||
|
||||
import sys, re
|
||||
dist_dir = "/home/calvin/projects/linkchecker"
|
||||
sys.path.insert(0,dist_dir)
|
||||
import fcgi, linkcheck
|
||||
|
||||
# main
|
||||
try:
|
||||
while isFCGI():
|
||||
req = FCGI()
|
||||
while fcgi.isFCGI():
|
||||
req = fcgi.FCGI()
|
||||
req.out.write("Content-type: text/html\r\n"
|
||||
"Cache-Control: no-cache\r\n"
|
||||
"\r\n")
|
||||
form = req.getFieldStorage()
|
||||
if not checkform(form):
|
||||
logit(form, req)
|
||||
printError(req)
|
||||
if not linkcheck.lc_cgi.checkform(form):
|
||||
linkcheck.lc_cgi.logit(form, req.env)
|
||||
linkcheck.lc_cgi.printError(req.out)
|
||||
req.Finish()
|
||||
continue
|
||||
config = linkcheck.Config.Configuration()
|
||||
config["recursionlevel"] = int(form["level"].value)
|
||||
config["log"] = linkcheck.Logging.HtmlLogger()
|
||||
config["log"] = linkcheck.Logging.HtmlLogger(req.out)
|
||||
config.disableThreading()
|
||||
if form.has_key("anchors"): config["anchors"] = 1
|
||||
if not form.has_key("errors"): config["verbose"] = 1
|
||||
if form.has_key("intern"):
|
||||
config["internlinks"].append(re.compile("^(ftp|https?)://"+getHostName()))
|
||||
config["internlinks"].append(re.compile("^(ftp|https?)://"+\
|
||||
linkcheck.lc_cgi.getHostName(form)))
|
||||
else:
|
||||
config["internlinks"].append(re.compile(".+"))
|
||||
# avoid checking of local files
|
||||
|
|
@ -79,5 +37,5 @@ try:
|
|||
req.Finish()
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc(file = open('traceback', 'w'))
|
||||
traceback.print_exc(file = open('traceback', 'a'))
|
||||
|
||||
|
|
|
|||
38
lc.sz_fcgi
Normal file
38
lc.sz_fcgi
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
#!/usr/bin/env python
|
||||
import sys,re,thread
|
||||
|
||||
dist_dir = "/home/calvin/projects/linkchecker"
|
||||
sys.path.insert(0,dist_dir)
|
||||
import sz_fcgi, linkcheck
|
||||
|
||||
def func(fcg, req):
|
||||
req.out.write("Content-type: text/html\r\n"
|
||||
"Cache-Control: no-cache\r\n"
|
||||
"\r\n")
|
||||
form = req.getFieldStorage()
|
||||
if not linkcheck.lc_cgi.checkform(form):
|
||||
linkcheck.lc_cgi.logit(form, req.env)
|
||||
linkcheck.lc_cgi.printError(req.out)
|
||||
req.Finish()
|
||||
thread.exit()
|
||||
config = linkcheck.Config.Configuration()
|
||||
config["recursionlevel"] = int(form["level"].value)
|
||||
config["log"] = linkcheck.Logging.HtmlLogger(req.out)
|
||||
config.disableThreading()
|
||||
if form.has_key("anchors"): config["anchors"] = 1
|
||||
if not form.has_key("errors"): config["verbose"] = 1
|
||||
if form.has_key("intern"):
|
||||
config["internlinks"].append(re.compile("^(ftp|https?)://"+\
|
||||
linkcheck.lc_cgi.getHostName(form)))
|
||||
else:
|
||||
config["internlinks"].append(re.compile(".+"))
|
||||
# avoid checking of local files
|
||||
config["externlinks"].append((re.compile("^file:"), 1))
|
||||
# start checking
|
||||
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(form["url"].value, 0))
|
||||
linkcheck.checkUrls(config)
|
||||
req.Finish()
|
||||
thread.exit()
|
||||
|
||||
# main
|
||||
sz_fcgi.SZ_FCGI(func).run()
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# __init__.py for this module
|
||||
|
||||
import Config,UrlData,OutputReader,sys
|
||||
import Config,UrlData,OutputReader,sys,lc_cgi
|
||||
|
||||
def checkUrls(config = Config.Configuration()):
|
||||
config.log_init()
|
||||
|
|
|
|||
43
linkcheck/lc_cgi.py
Normal file
43
linkcheck/lc_cgi.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
import re,time,urlparse
|
||||
|
||||
def checkform(form):
|
||||
for key in ["level","url"]:
|
||||
if not form.has_key(key) or form[key].value == "": return 0
|
||||
if not re.match(r"^http://[-\w./~]+$", form["url"].value): return 0
|
||||
if not re.match(r"\d", form["level"].value): return 0
|
||||
if int(form["level"].value) > 3: return 0
|
||||
if form.has_key("anchors"):
|
||||
if not form["anchors"].value=="on": return 0
|
||||
if form.has_key("errors"):
|
||||
if not form["errors"].value=="on": return 0
|
||||
if form.has_key("intern"):
|
||||
if not form["intern"].value=="on": return 0
|
||||
return 1
|
||||
|
||||
def getHostName(form):
|
||||
return urlparse.urlparse(form["url"].value)[1]
|
||||
|
||||
def logit(form, env):
|
||||
log = open("linkchecker.log","a")
|
||||
log.write("\n"+time.strftime("%d.%m.%Y %H:%M:%S", time.localtime(time.time()))+"\n")
|
||||
for var in ["HTTP_USER_AGENT","REMOTE_ADDR","REMOTE_HOST","REMOTE_PORT"]:
|
||||
if env.has_key(var):
|
||||
log.write(var+"="+env[var]+"\n")
|
||||
for key in ["level","url","anchors","errors","intern"]:
|
||||
if form.has_key(key):
|
||||
log.write(str(form[key])+"\n")
|
||||
log.close()
|
||||
|
||||
def printError(out):
|
||||
out.write("""<html><head></head>
|
||||
<body text="#192c83" bgcolor="#fff7e5" link="#191c83" vlink="#191c83"
|
||||
alink="#191c83">
|
||||
<blockquote>
|
||||
<b>Error</b><br>
|
||||
The LinkChecker Online script has encountered an error. Please ensure
|
||||
that your provided URL link begins with <code>http://</code> and
|
||||
contains only these characters: <code>A-Za-z0-9./_~-</code><br><br>
|
||||
Errors are logged.
|
||||
</blockquote>
|
||||
</body>
|
||||
</html>""")
|
||||
33
sz_fcgi.py
33
sz_fcgi.py
|
|
@ -6,38 +6,15 @@ __doc__ = "Multithreaded FastCGI Wrapper"
|
|||
import sys,thread,fcgi
|
||||
|
||||
class SZ_FCGI:
|
||||
|
||||
def __init__(self,func):
|
||||
self.func = func
|
||||
self.handles = {}
|
||||
|
||||
# create a new thread to handle requests
|
||||
def run(self):
|
||||
while fcgi.isFCGI():
|
||||
req = fcgi.FCGI()
|
||||
thread.start_new_thread(self.handle_request,(req,0))
|
||||
|
||||
# Finish thread and send all data back to the FCGI parent
|
||||
def finish(self):
|
||||
req = self.handles[thread.get_ident()]
|
||||
req.Finish()
|
||||
thread.exit()
|
||||
|
||||
# Call function - handled by a thread
|
||||
def handle_request(self,*args):
|
||||
req = args[0]
|
||||
self.handles[thread.get_ident()] = req
|
||||
try:
|
||||
self.func(self,req.env,req.getFieldStorage())
|
||||
while fcgi.isFCGI():
|
||||
req = fcgi.FCGI()
|
||||
thread.start_new_thread(self.func,(self, req))
|
||||
except:
|
||||
write_traceback()
|
||||
|
||||
# Our own FCGI print routine
|
||||
def print(self,*args):
|
||||
req = self.handles[thread.get_ident()]
|
||||
for s in args:
|
||||
req.out.write(str(s))
|
||||
req.out.flush()
|
||||
|
||||
def println(self,*args):
|
||||
req = self.handles[thread.get_ident()]
|
||||
import traceback
|
||||
traceback.print_exc(file = open('traceback', 'a'))
|
||||
|
|
|
|||
Loading…
Reference in a new issue