mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-26 00:54:43 +00:00
CGI work
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@25 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
262915f0af
commit
225a49df6d
8 changed files with 562 additions and 51 deletions
|
|
@ -1,3 +1,8 @@
|
|||
8.3.2000 Version 1.1.1
|
||||
* FastCGI modules added (no script yet)
|
||||
* CGI script fixes
|
||||
* supply strict/non-strict flag for each external filtering rule
|
||||
|
||||
7.3.2000
|
||||
* support for multiple user/password pairs
|
||||
|
||||
|
|
|
|||
439
fcgi.py
Normal file
439
fcgi.py
Normal file
|
|
@ -0,0 +1,439 @@
|
|||
#!/usr/bin/env python
|
||||
#------------------------------------------------------------------------
|
||||
# Copyright (c) 1998 by Total Control Software
|
||||
# All Rights Reserved
|
||||
#------------------------------------------------------------------------
|
||||
#
|
||||
# Module Name: fcgi.py
|
||||
#
|
||||
# Description: Handles communication with the FastCGI module of the
|
||||
# web server without using the FastCGI developers kit, but
|
||||
# will also work in a non-FastCGI environment, (straight CGI.)
|
||||
# This module was originally fetched from someplace on the
|
||||
# Net (I don't remember where and I can't find it now...) and
|
||||
# has been significantly modified to fix several bugs, be more
|
||||
# readable, more robust at handling large CGI data and return
|
||||
# document sizes, and also to fit the model that we had previously
|
||||
# used for FastCGI.
|
||||
#
|
||||
# WARNING: If you don't know what you are doing, don't tinker with this
|
||||
# module!
|
||||
#
|
||||
# Creation Date: 1/30/98 2:59:04PM
|
||||
#
|
||||
# License: This is free software. You may use this software for any
|
||||
# purpose including modification/redistribution, so long as
|
||||
# this header remains intact and that you do not claim any
|
||||
# rights of ownership or authorship of this software. This
|
||||
# software has been tested, but no warranty is expressed or
|
||||
# implied.
|
||||
#
|
||||
#------------------------------------------------------------------------
|
||||
|
||||
|
||||
import os, sys, string, socket, errno
|
||||
from cStringIO import StringIO
|
||||
import cgi
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
# Set various FastCGI constants
|
||||
# Maximum number of requests that can be handled
|
||||
FCGI_MAX_REQS=1
|
||||
FCGI_MAX_CONNS = 1
|
||||
|
||||
# Supported version of the FastCGI protocol
|
||||
FCGI_VERSION_1 = 1
|
||||
|
||||
# Boolean: can this application multiplex connections?
|
||||
FCGI_MPXS_CONNS=0
|
||||
|
||||
# Record types
|
||||
FCGI_BEGIN_REQUEST = 1 ; FCGI_ABORT_REQUEST = 2 ; FCGI_END_REQUEST = 3
|
||||
FCGI_PARAMS = 4 ; FCGI_STDIN = 5 ; FCGI_STDOUT = 6
|
||||
FCGI_STDERR = 7 ; FCGI_DATA = 8 ; FCGI_GET_VALUES = 9
|
||||
FCGI_GET_VALUES_RESULT = 10
|
||||
FCGI_UNKNOWN_TYPE = 11
|
||||
FCGI_MAXTYPE = FCGI_UNKNOWN_TYPE
|
||||
|
||||
# Types of management records
|
||||
ManagementTypes = [FCGI_GET_VALUES]
|
||||
|
||||
FCGI_NULL_REQUEST_ID=0
|
||||
|
||||
# Masks for flags component of FCGI_BEGIN_REQUEST
|
||||
FCGI_KEEP_CONN = 1
|
||||
|
||||
# Values for role component of FCGI_BEGIN_REQUEST
|
||||
FCGI_RESPONDER = 1 ; FCGI_AUTHORIZER = 2 ; FCGI_FILTER = 3
|
||||
|
||||
# Values for protocolStatus component of FCGI_END_REQUEST
|
||||
FCGI_REQUEST_COMPLETE = 0 # Request completed nicely
|
||||
FCGI_CANT_MPX_CONN = 1 # This app can't multiplex
|
||||
FCGI_OVERLOADED = 2 # New request rejected; too busy
|
||||
FCGI_UNKNOWN_ROLE = 3 # Role value not known
|
||||
|
||||
|
||||
error = 'fcgi.error'
|
||||
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
# The following function is used during debugging; it isn't called
|
||||
# anywhere at the moment
|
||||
|
||||
def _error(msg):
|
||||
"Append a string to /tmp/err"
|
||||
errf=open('/tmp/err', 'a+')
|
||||
errf.write(msg+'\n')
|
||||
errf.close()
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
class record:
|
||||
"Class representing FastCGI records"
|
||||
def __init__(self):
|
||||
self.version = FCGI_VERSION_1
|
||||
self.recType = FCGI_UNKNOWN_TYPE
|
||||
self.reqId = FCGI_NULL_REQUEST_ID
|
||||
self.content = ""
|
||||
|
||||
#----------------------------------------
|
||||
def readRecord(self, sock):
|
||||
s = map(ord, sock.recv(8))
|
||||
self.version, self.recType, paddingLength = s[0], s[1], s[6]
|
||||
self.reqId, contentLength = (s[2]<<8)+s[3], (s[4]<<8)+s[5]
|
||||
self.content = ""
|
||||
while len(self.content) < contentLength:
|
||||
data = sock.recv(contentLength - len(self.content))
|
||||
self.content = self.content + data
|
||||
if paddingLength != 0:
|
||||
padding = sock.recv(paddingLength)
|
||||
|
||||
# Parse the content information
|
||||
c = self.content
|
||||
if self.recType == FCGI_BEGIN_REQUEST:
|
||||
self.role = (ord(c[0])<<8) + ord(c[1])
|
||||
self.flags = ord(c[2])
|
||||
|
||||
elif self.recType == FCGI_UNKNOWN_TYPE:
|
||||
self.unknownType = ord(c[0])
|
||||
|
||||
elif self.recType == FCGI_GET_VALUES or self.recType == FCGI_PARAMS:
|
||||
self.values={}
|
||||
pos=0
|
||||
while pos < len(c):
|
||||
name, value, pos = readPair(c, pos)
|
||||
self.values[name] = value
|
||||
elif self.recType == FCGI_END_REQUEST:
|
||||
b = map(ord, c[0:4])
|
||||
self.appStatus = (b[0]<<24) + (b[1]<<16) + (b[2]<<8) + b[3]
|
||||
self.protocolStatus = ord(c[4])
|
||||
|
||||
#----------------------------------------
|
||||
def writeRecord(self, sock):
|
||||
content = self.content
|
||||
if self.recType == FCGI_BEGIN_REQUEST:
|
||||
content = chr(self.role>>8) + chr(self.role & 255) + chr(self.flags) + 5*'\000'
|
||||
|
||||
elif self.recType == FCGI_UNKNOWN_TYPE:
|
||||
content = chr(self.unknownType) + 7*'\000'
|
||||
|
||||
elif self.recType==FCGI_GET_VALUES or self.recType==FCGI_PARAMS:
|
||||
content = ""
|
||||
for i in self.values.keys():
|
||||
content = content + writePair(i, self.values[i])
|
||||
|
||||
elif self.recType==FCGI_END_REQUEST:
|
||||
v = self.appStatus
|
||||
content = chr((v>>24)&255) + chr((v>>16)&255) + chr((v>>8)&255) + chr(v&255)
|
||||
content = content + chr(self.protocolStatus) + 3*'\000'
|
||||
|
||||
cLen = len(content)
|
||||
eLen = (cLen + 7) & (0xFFFF - 7) # align to an 8-byte boundary
|
||||
padLen = eLen - cLen
|
||||
|
||||
hdr = [ self.version,
|
||||
self.recType,
|
||||
self.reqId >> 8,
|
||||
self.reqId & 255,
|
||||
cLen >> 8,
|
||||
cLen & 255,
|
||||
padLen,
|
||||
0]
|
||||
hdr = string.joinfields(map(chr, hdr), '')
|
||||
|
||||
sock.send(hdr + content + padLen*'\000')
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
def readPair(s, pos):
|
||||
nameLen=ord(s[pos]) ; pos=pos+1
|
||||
if nameLen & 128:
|
||||
b=map(ord, s[pos:pos+3]) ; pos=pos+3
|
||||
nameLen=((nameLen&127)<<24) + (b[0]<<16) + (b[1]<<8) + b[2]
|
||||
valueLen=ord(s[pos]) ; pos=pos+1
|
||||
if valueLen & 128:
|
||||
b=map(ord, s[pos:pos+3]) ; pos=pos+3
|
||||
valueLen=((valueLen&127)<<24) + (b[0]<<16) + (b[1]<<8) + b[2]
|
||||
return ( s[pos:pos+nameLen], s[pos+nameLen:pos+nameLen+valueLen],
|
||||
pos+nameLen+valueLen )
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
def writePair(name, value):
|
||||
l=len(name)
|
||||
if l<128: s=chr(l)
|
||||
else:
|
||||
s=chr(128|(l>>24)&255) + chr((l>>16)&255) + chr((l>>8)&255) + chr(l&255)
|
||||
l=len(value)
|
||||
if l<128: s=s+chr(l)
|
||||
else:
|
||||
s=s+chr(128|(l>>24)&255) + chr((l>>16)&255) + chr((l>>8)&255) + chr(l&255)
|
||||
return s + name + value
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
def HandleManTypes(r, conn):
|
||||
if r.recType == FCGI_GET_VALUES:
|
||||
r.recType = FCGI_GET_VALUES_RESULT
|
||||
v={}
|
||||
vars={'FCGI_MAX_CONNS' : FCGI_MAX_CONNS,
|
||||
'FCGI_MAX_REQS' : FCGI_MAX_REQS,
|
||||
'FCGI_MPXS_CONNS': FCGI_MPXS_CONNS}
|
||||
for i in r.values.keys():
|
||||
if vars.has_key(i): v[i]=vars[i]
|
||||
r.values=vars
|
||||
r.writeRecord(conn)
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
|
||||
_isFCGI = 1 # assume it is until we find out for sure
|
||||
|
||||
def isFCGI():
|
||||
global _isFCGI
|
||||
return _isFCGI
|
||||
|
||||
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
|
||||
_init = None
|
||||
_sock = None
|
||||
|
||||
class FCGI:
|
||||
def __init__(self):
|
||||
self.haveFinished = 0
|
||||
if _init == None:
|
||||
_startup()
|
||||
if not isFCGI():
|
||||
self.haveFinished = 1
|
||||
self.inp, self.out, self.err, self.env = \
|
||||
sys.stdin, sys.stdout, sys.stderr, os.environ
|
||||
return
|
||||
|
||||
if os.environ.has_key('FCGI_WEB_SERVER_ADDRS'):
|
||||
good_addrs=map(string.strip,
|
||||
string.split(os.environ['FCGI_WEB_SERVER_ADDRS'], ','))
|
||||
else:
|
||||
good_addrs=None
|
||||
|
||||
self.conn, addr=_sock.accept()
|
||||
stdin = data = ""
|
||||
self.env = {}
|
||||
self.requestId=0
|
||||
remaining=1
|
||||
|
||||
# Check if the connection is from a legal address
|
||||
if good_addrs!=None and addr not in good_addrs:
|
||||
raise error, 'Connection from invalid server!'
|
||||
|
||||
while remaining:
|
||||
r=record(); r.readRecord(self.conn)
|
||||
|
||||
if r.recType in ManagementTypes:
|
||||
HandleManTypes(r, self.conn)
|
||||
|
||||
elif r.reqId==0:
|
||||
# Oh, poopy. It's a management record of an unknown
|
||||
# type. Signal the error.
|
||||
r2=record()
|
||||
r2.recType=FCGI_UNKNOWN_TYPE ; r2.unknownType=r.recType
|
||||
r2.writeRecord(self.conn)
|
||||
continue # Charge onwards
|
||||
|
||||
# Ignore requests that aren't active
|
||||
elif r.reqId != self.requestId and r.recType != FCGI_BEGIN_REQUEST:
|
||||
continue
|
||||
|
||||
# If we're already doing a request, ignore further BEGIN_REQUESTs
|
||||
elif r.recType == FCGI_BEGIN_REQUEST and self.requestId != 0:
|
||||
continue
|
||||
|
||||
# Begin a new request
|
||||
if r.recType == FCGI_BEGIN_REQUEST:
|
||||
self.requestId = r.reqId
|
||||
if r.role == FCGI_AUTHORIZER: remaining=1
|
||||
elif r.role == FCGI_RESPONDER: remaining=2
|
||||
elif r.role == FCGI_FILTER: remaining=3
|
||||
|
||||
elif r.recType == FCGI_PARAMS:
|
||||
if r.content == "":
|
||||
remaining=remaining-1
|
||||
else:
|
||||
for i in r.values.keys():
|
||||
self.env[i] = r.values[i]
|
||||
|
||||
elif r.recType == FCGI_STDIN:
|
||||
if r.content == "":
|
||||
remaining=remaining-1
|
||||
else:
|
||||
stdin=stdin+r.content
|
||||
|
||||
elif r.recType==FCGI_DATA:
|
||||
if r.content == "":
|
||||
remaining=remaining-1
|
||||
else:
|
||||
data=data+r.content
|
||||
# end of while remaining:
|
||||
|
||||
self.inp = sys.stdin = StringIO(stdin)
|
||||
self.err = sys.stderr = StringIO()
|
||||
self.out = sys.stdout = StringIO()
|
||||
self.data = StringIO(data)
|
||||
|
||||
def __del__(self):
|
||||
self.Finish()
|
||||
|
||||
def Finish(self, status=0):
|
||||
if not self.haveFinished:
|
||||
self.haveFinished = 1
|
||||
|
||||
self.err.seek(0,0)
|
||||
self.out.seek(0,0)
|
||||
|
||||
r=record()
|
||||
r.recType = FCGI_STDERR
|
||||
r.reqId = self.requestId
|
||||
data = self.err.read()
|
||||
while data:
|
||||
chunk, data = self.getNextChunk(data)
|
||||
r.content = chunk
|
||||
r.writeRecord(self.conn)
|
||||
r.content="" ; r.writeRecord(self.conn) # Terminate stream
|
||||
|
||||
r.recType = FCGI_STDOUT
|
||||
data = self.out.read()
|
||||
while data:
|
||||
chunk, data = self.getNextChunk(data)
|
||||
r.content = chunk
|
||||
r.writeRecord(self.conn)
|
||||
r.content="" ; r.writeRecord(self.conn) # Terminate stream
|
||||
|
||||
r=record()
|
||||
r.recType=FCGI_END_REQUEST
|
||||
r.reqId=self.requestId
|
||||
r.appStatus=status
|
||||
r.protocolStatus=FCGI_REQUEST_COMPLETE
|
||||
r.writeRecord(self.conn)
|
||||
self.conn.close()
|
||||
|
||||
|
||||
def getFieldStorage(self):
|
||||
method = 'GET'
|
||||
if self.env.has_key('REQUEST_METHOD'):
|
||||
method = string.upper(self.env['REQUEST_METHOD'])
|
||||
if method == 'GET':
|
||||
return cgi.FieldStorage(environ=self.env, keep_blank_values=1)
|
||||
else:
|
||||
return cgi.FieldStorage(fp=self.inp, environ=self.env, keep_blank_values=1)
|
||||
|
||||
def getNextChunk(self, data):
|
||||
chunk = data[:8192]
|
||||
data = data[8192:]
|
||||
return chunk, data
|
||||
|
||||
|
||||
Accept = FCGI # alias for backward compatibility
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
def _startup():
|
||||
global _init
|
||||
_init = 1
|
||||
try:
|
||||
s=socket.fromfd(sys.stdin.fileno(), socket.AF_INET,
|
||||
socket.SOCK_STREAM)
|
||||
s.getpeername()
|
||||
except socket.error, (err, errmsg):
|
||||
if err!=errno.ENOTCONN: # must be a non-fastCGI environment
|
||||
global _isFCGI
|
||||
_isFCGI = 0
|
||||
return
|
||||
|
||||
global _sock
|
||||
_sock = s
|
||||
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
def _test():
|
||||
counter=0
|
||||
try:
|
||||
while isFCGI():
|
||||
req = FCGI()
|
||||
counter=counter+1
|
||||
|
||||
try:
|
||||
fs = req.getFieldStorage()
|
||||
size = string.atoi(fs['size'].value)
|
||||
doc = ['*' * size]
|
||||
except:
|
||||
doc = ['<HTML><HEAD><TITLE>FCGI TestApp</TITLE></HEAD>\n<BODY>\n']
|
||||
doc.append('<H2>FCGI TestApp</H2><P>')
|
||||
doc.append('<b>request count</b> = %d<br>' % counter)
|
||||
doc.append('<b>pid</b> = %s<br>' % os.getpid())
|
||||
if req.env.has_key('CONTENT_LENGTH'):
|
||||
cl = string.atoi(req.env['CONTENT_LENGTH'])
|
||||
doc.append('<br><b>POST data (%s):</b><br><pre>' % cl)
|
||||
keys = fs.keys()
|
||||
keys.sort()
|
||||
for k in keys:
|
||||
val = fs[k]
|
||||
if type(val) == type([]):
|
||||
doc.append(' <b>%-15s :</b> %s\n' % (k, val))
|
||||
else:
|
||||
doc.append(' <b>%-15s :</b> %s\n' % (k, val.value))
|
||||
doc.append('</pre>')
|
||||
|
||||
|
||||
doc.append('<P><HR><P><pre>')
|
||||
keys = req.env.keys()
|
||||
keys.sort()
|
||||
for k in keys:
|
||||
doc.append('<b>%-20s :</b> %s\n' % (k, req.env[k]))
|
||||
doc.append('\n</pre><P><HR>\n')
|
||||
doc.append('</BODY></HTML>\n')
|
||||
|
||||
|
||||
doc = string.join(doc, '')
|
||||
req.out.write('Content-length: %s\r\n'
|
||||
'Content-type: text/html\r\n'
|
||||
'Cache-Control: no-cache\r\n'
|
||||
'\r\n'
|
||||
% len(doc))
|
||||
req.out.write(doc)
|
||||
|
||||
req.Finish()
|
||||
except:
|
||||
import traceback
|
||||
f = open('traceback', 'w')
|
||||
traceback.print_exc( file = f )
|
||||
# f.write('%s' % doc)
|
||||
|
||||
if __name__=='__main__':
|
||||
#import pdb
|
||||
#pdb.run('_test()')
|
||||
_test()
|
||||
59
lc.cgi
59
lc.cgi
|
|
@ -5,17 +5,16 @@ import re,cgi,sys,urlparse,time,os
|
|||
# configuration
|
||||
sys.stderr = sys.stdout
|
||||
cgi_dir = "/home/calvin/public_html/cgi-bin"
|
||||
dist_dir = "/home/calvin/linkchecker-1.1.0"
|
||||
lc = pylice_dir + "/pylice"
|
||||
dist_dir = "/home/calvin/projects/linkchecker"
|
||||
sys.path.insert(0,dist_dir)
|
||||
cgi.logfile = cgi_dir + "/lc.log"
|
||||
cgi.logfile = cgi_dir + "/linkchecker.log" # must be an existing file
|
||||
# end configuration
|
||||
|
||||
def testit():
|
||||
cgi.test()
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
def checkform():
|
||||
def checkform(form):
|
||||
for key in ["level","url"]:
|
||||
if not form.has_key(key) or form[key].value == "": return 0
|
||||
if not re.match(r"^http://[-\w./~]+$", form["url"].value): return 0
|
||||
|
|
@ -29,27 +28,22 @@ def checkform():
|
|||
if not form["intern"].value=="on": return 0
|
||||
return 1
|
||||
|
||||
|
||||
def getHostName():
|
||||
return urlparse.urlparse(form["url"].value)[1]
|
||||
|
||||
|
||||
def logit():
|
||||
logfile = open("/home/calvin/log/linkchecker.log","a")
|
||||
logfile.write("\n"+time.strftime("%d.%m.%Y %H:%M:%S", time.localtime(time.time()))+"\n")
|
||||
def logit(form):
|
||||
cgi.log("\n"+time.strftime("%d.%m.%Y %H:%M:%S", time.localtime(time.time())))
|
||||
for var in ["HTTP_USER_AGENT","REMOTE_ADDR","REMOTE_HOST","REMOTE_PORT"]:
|
||||
if os.environ.has_key(var):
|
||||
logfile.write(var+"="+os.environ[var]+"\n")
|
||||
cgi.log(var+"="+os.environ[var])
|
||||
for key in ["level","url","anchors","errors","intern"]:
|
||||
if form.has_key(key):
|
||||
logfile.write(str(form[key])+"\n")
|
||||
logfile.close()
|
||||
cgi.log(str(form[key]))
|
||||
|
||||
|
||||
def printError():
|
||||
print """<html><head></head>
|
||||
<body text="#192c83" bgcolor="#fff7e5" link="#191c83" vlink="#191c83"
|
||||
alink="#191c83" >
|
||||
<body text="#192c83" bgcolor="#fff7e5" link="#191c83" vlink="#191c83"
|
||||
alink="#191c83">
|
||||
<blockquote>
|
||||
<b>Error</b><br>
|
||||
The LinkChecker Online script has encountered an error. Please ensure
|
||||
|
|
@ -60,28 +54,31 @@ Errors are logged.
|
|||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
import linkcheck
|
||||
|
||||
# main
|
||||
print "Content-type: text/html"
|
||||
print "Cache-Control: no-cache"
|
||||
print
|
||||
#testit()
|
||||
form = cgi.FieldStorage()
|
||||
if not checkform():
|
||||
logit()
|
||||
if not checkform(form):
|
||||
logit(form)
|
||||
printError()
|
||||
sys.exit(0)
|
||||
args=["", "-H", "-r "+form["level"].value, "-s"]
|
||||
if form.has_key("anchors"):
|
||||
args.append("-a")
|
||||
if not form.has_key("errors"):
|
||||
args.append("-v")
|
||||
config = linkcheck.Config.Configuration()
|
||||
config["recursionlevel"] = int(form["level"].value)
|
||||
config["log"] = linkcheck.Logging.HtmlLogger()
|
||||
if form.has_key("anchors"): config["anchors"] = 1
|
||||
if not form.has_key("errors"): config["verbose"] = 1
|
||||
if form.has_key("intern"):
|
||||
args.append("--intern=^(ftp|http)://"+getHostName())
|
||||
config["internlinks"].append(re.compile("^(ftp|https?)://"+getHostName()))
|
||||
else:
|
||||
args.append("--extern=^file:")
|
||||
args.append("--intern=.+")
|
||||
config["internlinks"].append(re.compile(".+"))
|
||||
# avoid checking of local files
|
||||
config["externlinks"].append((re.compile("^file:"), 1))
|
||||
|
||||
args.append(form["url"].value)
|
||||
sys.argv = args
|
||||
execfile(lc)
|
||||
|
||||
# start checking
|
||||
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(form["url"].value, 0))
|
||||
linkcheck.checkUrls(config)
|
||||
|
|
|
|||
|
|
@ -308,7 +308,13 @@ class Configuration(UserDict.UserDict):
|
|||
self.data["authentication"].append((re.compile(".*"), "anonymous", "guest@"))
|
||||
|
||||
section = "filtering"
|
||||
try: self.data["externlinks"].append(re.compile(cfgparser.get(section, "externlinks")))
|
||||
try:
|
||||
i=1
|
||||
while 1:
|
||||
tuple = string.split(cfgparser.get(section, "extern"+`i`))
|
||||
if len(tuple)!=2: break
|
||||
self.data["externlinks"].append((re.compile(tuple[0]),
|
||||
int(tuple[1])))
|
||||
except: pass
|
||||
try: self.data["internlinks"].append(re.compile(cfgparser.get(section, "internlinks")))
|
||||
except: pass
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ class UrlData:
|
|||
return
|
||||
try:
|
||||
self.buildUrl()
|
||||
self.extern = self._isExtern(config)
|
||||
self.extern = self._getExtern(config)
|
||||
except:
|
||||
type, value = sys.exc_info()[:2]
|
||||
self.setError(str(value))
|
||||
|
|
@ -114,7 +114,7 @@ class UrlData:
|
|||
|
||||
# apply filter
|
||||
Config.debug("DEBUG: checking filter\n")
|
||||
if config["strict"] and self.extern:
|
||||
if self.extern and (config["strict"] or self.extern[1]):
|
||||
self.setWarning("outside of domain filter, checked only syntax")
|
||||
self.logMe(config)
|
||||
return
|
||||
|
|
@ -177,8 +177,8 @@ class UrlData:
|
|||
return
|
||||
self.setWarning("anchor #"+anchor+" not found")
|
||||
|
||||
def _isExtern(self, config):
|
||||
if len(config["externlinks"])==0 and len(config["internlinks"])==0:
|
||||
def _getExtern(self, config):
|
||||
if not (config["externlinks"] or config["internlinks"]):
|
||||
return 0
|
||||
# deny and allow external checking
|
||||
Config.debug(self.url)
|
||||
|
|
@ -186,17 +186,17 @@ class UrlData:
|
|||
for pat in config["internlinks"]:
|
||||
if pat.search(self.url):
|
||||
return 0
|
||||
for pat in config["externlinks"]:
|
||||
for pat, strict in config["externlinks"]:
|
||||
if pat.search(self.url):
|
||||
return 1
|
||||
return (1, strict)
|
||||
else:
|
||||
for pat in config["externlinks"]:
|
||||
for pat, strict in config["externlinks"]:
|
||||
if pat.search(self.url):
|
||||
return 1
|
||||
return (1, strict)
|
||||
for pat in config["internlinks"]:
|
||||
if pat.search(self.url):
|
||||
return 0
|
||||
return 1
|
||||
return (1,0)
|
||||
|
||||
def getContent(self):
|
||||
"""Precondition: urlConnection is an opened URL.
|
||||
|
|
|
|||
18
linkchecker
18
linkchecker
|
|
@ -42,7 +42,7 @@ OPTIONS
|
|||
Default is no file output.
|
||||
-p pwd, --password=pwd
|
||||
Try given password for HTML and FTP authorization.
|
||||
Default is 'joe@'. See -u.
|
||||
Default is 'guest@'. See -u.
|
||||
-P host[:port], --proxy=host[:port]
|
||||
Use specified proxy for HTTP requests.
|
||||
Standard port is 8080. Default is to use no proxy.
|
||||
|
|
@ -78,8 +78,9 @@ o If you have your system configured to automatically establish a
|
|||
connection to the internet (e.g. with diald), it will connect when
|
||||
checking links not pointing to your local host.
|
||||
Use the -s and -i options to prevent this (see EXAMPLES).
|
||||
o Javascript and https links are currently ignored
|
||||
o Javascript links are currently ignored
|
||||
o If your platform does not support threading, linkchecker assumes -t0
|
||||
o You can supply multiple user/password pairs in a configuration file
|
||||
"""
|
||||
|
||||
Examples = """EXAMPLES
|
||||
|
|
@ -110,8 +111,9 @@ def printUsage(msg):
|
|||
# Read command line arguments
|
||||
try:
|
||||
# Note: cut out the name of the script
|
||||
options, args = getopt.getopt(sys.argv[1:], "aDe:f:hi:lP:o:p:qr:Rst:u:VvwW:",
|
||||
["anchors",
|
||||
options, args = getopt.getopt(sys.argv[1:],
|
||||
"aDe:f:hi:lP:o:p:qr:Rst:u:VvwW:", # short options
|
||||
["anchors", # long options
|
||||
"config=",
|
||||
"debug",
|
||||
"extern=",
|
||||
|
|
@ -160,7 +162,7 @@ for opt,arg in options:
|
|||
linkcheck.Config.DebugFlag = 1
|
||||
|
||||
elif opt=="-e" or opt=="--extern":
|
||||
config["externlinks"].append(re.compile(arg))
|
||||
config["externlinks"].append((re.compile(arg), 0))
|
||||
|
||||
elif opt=="-h" or opt=="--help":
|
||||
printHelp()
|
||||
|
|
@ -192,11 +194,11 @@ for opt,arg in options:
|
|||
config["proxy"] = arg
|
||||
|
||||
elif opt=="-p" or opt=="--password":
|
||||
_password=arg
|
||||
constructAuth=1
|
||||
_password = arg
|
||||
constructauth = 1
|
||||
|
||||
elif opt=="-q" or opt=="--quiet":
|
||||
config["quiet"]=1
|
||||
config["quiet"] = 1
|
||||
|
||||
elif opt=="-r" or opt=="--recursion-level":
|
||||
if int(arg) >= 0:
|
||||
|
|
|
|||
|
|
@ -18,9 +18,11 @@ strict=0
|
|||
#proxy=www-proxy.uni-sb.de
|
||||
#proxyport=3128
|
||||
|
||||
# for each extern link we can specify if it is strict or not
|
||||
[filtering]
|
||||
externlinks=
|
||||
internlinks=
|
||||
# strict avoid checking of local files
|
||||
#extern1=^file:.* 1
|
||||
#internlinks=
|
||||
allowdeny=0
|
||||
|
||||
# You can provide different user/password pairs for different link types.
|
||||
|
|
|
|||
60
sz_fcgi.py
Normal file
60
sz_fcgi.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
# sz_fcgi.py - Multithreaded FastCGI Wrapper
|
||||
__version__ = "v0.8 19/10/1998 ajung"
|
||||
__doc__ = "Multithreaded FastCGI Wrapper"
|
||||
|
||||
|
||||
import sys,thread,fcgi
|
||||
|
||||
class SZ_FCGI:
|
||||
|
||||
# Constructor
|
||||
def __init__(self,func):
|
||||
self.func = func
|
||||
self.handles = {}
|
||||
return None
|
||||
|
||||
# create a new thread to handle requests
|
||||
def run(self):
|
||||
try:
|
||||
while fcgi.isFCGI():
|
||||
req = fcgi.FCGI()
|
||||
thread.start_new_thread(self.handle_request,(req,0))
|
||||
|
||||
except:
|
||||
write_log('isCGI() failed')
|
||||
|
||||
|
||||
# Finish thread and send all data back to the FCGI parent
|
||||
def finish(self):
|
||||
req = self.handles[thread.get_ident()]
|
||||
req.Finish()
|
||||
thread.exit()
|
||||
|
||||
|
||||
|
||||
# Call function - handled by a thread
|
||||
def handle_request(self,*args):
|
||||
|
||||
req = args[0]
|
||||
self.handles[thread.get_ident()] = req
|
||||
|
||||
try:
|
||||
self.func(self,req.env,req.getFieldStorage())
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
# Our own FCGI print routine
|
||||
def pr(self,*args):
|
||||
|
||||
req = self.handles[thread.get_ident()]
|
||||
|
||||
try:
|
||||
s=''
|
||||
for i in args: s=s+str(i)
|
||||
req.out.write(s+'\n')
|
||||
req.out.flush()
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
Loading…
Reference in a new issue