Initial revision

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@5 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2000-02-26 10:24:46 +00:00
commit 0329ca7682
100 changed files with 9413 additions and 0 deletions

3
.cvsignore Normal file
View file

@ -0,0 +1,3 @@
build-stamp
sample.html
linkchecker-out.*

148
ChangeLog Normal file
View file

@ -0,0 +1,148 @@
25.2.2000
* changed the name to LinkChecker. My old Java LinkChecker will
disappear because I do not maintain it anymore.
21.2.2000
* add -q, --quiet option
* convert all url host to lowercase
* log the download time for urls
20.2.2000
* add Graph Modelling Language (GML) output for sitemaps
* add SQL output
19.2.2000
* second try with HTTP/1.1: additionally close response
* remove deprecated options
* new option -W, --file-output
* fix typo for --password option
18.2.2000
* add "-" to mail adress syntax (Baz <B.Rowlingson@lancaster.ac.uk>)
* fix typo in pylice (David J. MacKenzie <djm@web.us.uu.net>)
10.2.2000 Version 0.8.0
* clean the CVS dir
* fixes for configuration
* first version of configuration parsing
9.2.2000
* do not pass anchor in HTTP requests
* fixes for configuration parsing
8.2.2000
* fixed bad finished_NoThreads function
* backed out HTTP/1.1 support. This library is buggy and
does not close some filehandles. Eventually you will get
a "Too many open files" error
* strip whitespace from parsed urls
6.2.2000
* fixed some bugs, the test suite is running again
5.2.2000
* made "LinkChecker" module
* configuration is dynamic; no more class variables
* print line number
* more agressive closing of filehandles
27.1.2000 Version 0.7.0
* put pylicerc in /etc for .deb package
* HTTP/1.1 support with httplib.py from Greg Stein
* DNS MX lookup for mail adresses
use the DNS module from Guido van Rossum and Anthony Baxter
MX lookup was a suggestion to LinkChecker from
Jimmy Engelbrecht <jimmy@e.kth.se>
26.1.2000 Version 0.6.2
* refined HTML link syntax to handle non-quoted URLs
* fix: set urlTuple to None if we cannot check anchors
* fixed anchor checking again
25.1.2000 Version 0.6.1
* fixed the HTML link syntax
24.1.2000
* fix: -e option did not work properly
* fix: reenabled LinkChecker Online, updated to 0.6.0
21.1.2000 Version 0.6.0
* fix: add hostname for relative redirections
* Added TODO list
20.1.2000
* Added documentation for the LinkChecker class
19.1.2000
* HTTP Proxy support
* CGI logging
18.1.2000 Version 0.5.0
* anchor checking in local HTML files
* configuration file
* HTTP Authorization support
* Send HTTP HEAD method to check and GET method to get contents
* Still missing: Proxy support (including HTTP status code 305)
17.1.2000
* cut parameter, query and fragment of local file names
* limit number of redirections to 5
14.1.2000 Version 0.4.3
* pylice.bat fix: now it really works
* fix for local Windows file arguments
14.1.2000 Version 0.4.2
* StringUtil.indentWith: use string multiplying
* Still missing: HTTP authorization and Proxy support
* pylice.bat fix: pass parameters
13.1.2000 Version 0.4.1
* Windows python.bat script
* installation updates
* additional .zip package for Windows
12.1.2000 Version 0.4.0
* fixed LinkChecker.NumThreads setting: if the platform
does not support threading, it is disabled automagically
* robots.txt parsing
* split up UrlData.py
* simplified option parsing
* strip optional quotes from urls
* use quit() not close() to disconnect from FTP servers
11.1.2000 Version 0.3.0
* try to finger for mailto: links
* try to connect for telnet: links
* removed time.sleep(1) commands, they are not necessary
* restrict CGI to recursion level 3
* make UrlCache and RobotsTxtCache thread safe
* fixed the 'No more open files' bug by closing all connections
* fixed thread synchronization in LinkChecker while loop
* you can specify -t 0 on the commandline to disable threading
* STILL MISSING:
HTTP authorization, Proxy and robots.txt parsing
10.1.2000 Version 0.2.0
* configure option to disable threading: LinkChecker.threadsupport
* do not rely on self.mime in HttpUrlData, this could be None
* flush stdout after each log entry
* use LinkChecker.User and LinkChecker.Password in FTP connections
* make sure redirection is not cyclic
9.1.2000 Version 0.1.0
* HTTP request
* FTP request
* fixed MaxRecursionLevel setting
* fixed name clash of variable and function warning
* ColoredLogger
* small doc changes
* CGI and HTML files for LinkChecker Online,
but I still have to install Python on my http server
(will try this tomorrow)
8.1.2000
* Properties, Threader, LinkChecker, UrlData, Logging
7.1.2000 Version 0.0.1
* Option processing

215
DNS/Base.py Normal file
View file

@ -0,0 +1,215 @@
# $Id$
import sys
import getopt
import socket
import string
import DNS,DNS.Lib,DNS.Type,DNS.Class,DNS.Opcode
#import asyncore
defaults= { 'protocol':'udp', 'port':53, 'opcode':DNS.Opcode.QUERY,
'qtype':DNS.Type.A, 'rd':1, 'timing':1 }
defaults['server']=[]
def ParseResolvConf():
"parses the /etc/resolv.conf file and sets defaults for name servers"
import string
global defaults
lines=open("/etc/resolv.conf").readlines()
for line in lines:
string.strip(line)
if line[0]==';' or line[0]=='#':
continue
fields=string.split(line)
if fields[0]=='domain':
defaults['domain']=fields[1]
if fields[0]=='search':
pass
if fields[0]=='options':
pass
if fields[0]=='sortlist':
pass
if fields[0]=='nameserver':
defaults['server'].append(fields[1])
class DnsRequest:
def __init__(self,*name,**args):
self.donefunc=None
self.async=None
self.defaults = {}
self.argparse(name,args)
self.defaults = self.args
def argparse(self,name,args):
if not name and self.defaults.has_key('name'):
args['name'] = self.defaults['name']
if type(name) is type(""):
args['name']=name
else:
if len(name) == 1:
if name[0]:
args['name']=name[0]
for i in defaults.keys():
if not args.has_key(i):
if self.defaults.has_key(i):
args[i]=self.defaults[i]
else:
args[i]=defaults[i]
if type(args['server']) == type(''):
args['server'] = [args['server']]
self.args=args
def socketInit(self,a,b):
import socket
self.s = socket.socket(a,b)
def processUDPReply(self):
import time
self.reply = self.s.recv(1024)
self.time_finish=time.time()
self.args['server']=self.ns
return self.processReply()
def processTCPReply(self):
import time
self.f = self.s.makefile('r')
header = self.f.read(2)
if len(header) < 2:
raise DNS.Error,'EOF'
count = DNS.Lib.unpack16bit(header)
self.reply = self.f.read(count)
if len(self.reply) != count:
raise DNS.Error,'incomplete reply'
self.time_finish=time.time()
self.args['server']=self.ns
return self.processReply()
def processReply(self):
import time
self.args['elapsed']=(self.time_finish-self.time_start)*1000
u = DNS.Lib.Munpacker(self.reply)
r=DNS.Lib.DnsResult(u,self.args)
r.args=self.args
#self.args=None # mark this DnsRequest object as used.
return r
#### TODO TODO TODO ####
if protocol == 'tcp' and qtype == DNS.Type.AXFR:
while 1:
header = f.read(2)
if len(header) < 2:
print '========== EOF =========='
break
count = DNS.Lib.unpack16bit(header)
if not count:
print '========== ZERO COUNT =========='
break
print '========== NEXT =========='
reply = f.read(count)
if len(reply) != count:
print '*** Incomplete reply ***'
break
u = DNS.Lib.Munpacker(reply)
DNS.Lib.dumpM(u)
def conn(self):
self.s.connect((self.ns,self.port))
def req(self,*name,**args):
import time,sys
self.argparse(name,args)
#if not self.args:
# raise DNS.Error,'reinitialize request before reuse'
protocol = self.args['protocol']
self.port = self.args['port']
opcode = self.args['opcode']
rd = self.args['rd']
server=self.args['server']
if type(self.args['qtype']) == type('foo'):
try:
qtype = eval(string.upper(self.args['qtype']), DNS.Type.__dict__)
except (NameError,SyntaxError):
raise DNS.Error,'unknown query type'
else:
qtype=self.args['qtype']
if not self.args.has_key('name'):
print self.args
raise DNS.Error,'nothing to lookup'
qname = self.args['name']
if qtype == DNS.Type.AXFR:
print 'Query type AXFR, protocol forced to TCP'
protocol = 'tcp'
#print 'QTYPE %d(%s)' % (qtype, DNS.Type.typestr(qtype))
m = DNS.Lib.Mpacker()
m.addHeader(0,
0, opcode, 0, 0, rd, 0, 0, 0,
1, 0, 0, 0)
m.addQuestion(qname, qtype, DNS.Class.IN)
self.request = m.getbuf()
if protocol == 'udp':
self.response=None
self.socketInit(socket.AF_INET, socket.SOCK_DGRAM)
for self.ns in server:
try:
#self.s.connect((self.ns, self.port))
self.conn()
self.time_start=time.time()
if not self.async:
self.s.send(self.request)
self.response=self.processUDPReply()
#except socket.error:
except None:
continue
break
if not self.response:
if not self.async:
raise DNS.Error,'no working nameservers found'
else:
self.response=None
for self.ns in server:
try:
self.socketInit(socket.AF_INET, socket.SOCK_STREAM)
self.time_start=time.time()
self.conn()
self.s.send(DNS.Lib.pack16bit(len(self.request)) + self.request)
self.s.shutdown(1)
self.response=self.processTCPReply()
except socket.error:
continue
break
if not self.response:
raise DNS.Error,'no working nameservers found'
if not self.async:
return self.response
#class DnsAsyncRequest(DnsRequest,asyncore.dispatcher_with_send):
class DnsAsyncRequest(DnsRequest):
def __init__(self,*name,**args):
if args.has_key('done') and args['done']:
self.donefunc=args['done']
else:
self.donefunc=self.showResult
self.realinit(name,args)
self.async=1
def conn(self):
import time
self.connect(self.ns,self.port)
self.time_start=time.time()
if self.args.has_key('start') and self.args['start']:
asyncore.dispatcher.go(self)
def socketInit(self,a,b):
self.create_socket(a,b)
asyncore.dispatcher.__init__(self)
self.s=self
def handle_read(self):
if self.args['protocol'] == 'udp':
self.response=self.processUDPReply()
if self.donefunc:
apply(self.donefunc,(self,))
def handle_connect(self):
self.send(self.request)
def handle_write(self):
pass
def showResult(self,*s):
self.response.show()

23
DNS/Class.py Normal file
View file

@ -0,0 +1,23 @@
# CLASS values (section 3.2.4)
IN = 1 # the Internet
CS = 2 # the CSNET class (Obsolete - used only for examples in
# some obsolete RFCs)
CH = 3 # the CHAOS class
HS = 4 # Hesiod [Dyer 87]
# QCLASS values (section 3.2.5)
ANY = 255 # any class
# Construct reverse mapping dictionary
_names = dir()
classmap = {}
for _name in _names:
if _name[0] != '_': classmap[eval(_name)] = _name
def classstr(klass):
if classmap.has_key(klass): return classmap[klass]
else: return `klass`

589
DNS/Lib.py Normal file
View file

@ -0,0 +1,589 @@
# Domain Name Server (DNS) interface
#
# See RFC 1035:
# ------------------------------------------------------------------------
# Network Working Group P. Mockapetris
# Request for Comments: 1035 ISI
# November 1987
# Obsoletes: RFCs 882, 883, 973
#
# DOMAIN NAMES - IMPLEMENTATION AND SPECIFICATION
# ------------------------------------------------------------------------
import string
import DNS.Type
import DNS.Class
import DNS.Opcode
import DNS.Status
# Low-level 16 and 32 bit integer packing and unpacking
def pack16bit(n):
return chr((n>>8)&0xFF) + chr(n&0xFF)
def pack32bit(n):
return chr((n>>24)&0xFF) + chr((n>>16)&0xFF) \
+ chr((n>>8)&0xFF) + chr(n&0xFF)
def unpack16bit(s):
return (ord(s[0])<<8) | ord(s[1])
def unpack32bit(s):
return (ord(s[0])<<24) | (ord(s[1])<<16) \
| (ord(s[2])<<8) | ord(s[3])
def addr2bin(addr):
if type(addr) == type(0):
return addr
bytes = string.splitfields(addr, '.')
if len(bytes) != 4: raise ValueError, 'bad IP address'
n = 0
for byte in bytes: n = n<<8 | string.atoi(byte)
return n
def bin2addr(n):
return '%d.%d.%d.%d' % ((n>>24)&0xFF, (n>>16)&0xFF,
(n>>8)&0xFF, n&0xFF)
# Packing class
class Packer:
def __init__(self):
self.buf = ''
self.index = {}
def getbuf(self):
return self.buf
def addbyte(self, c):
if len(c) != 1: raise TypeError, 'one character expected'
self.buf = self.buf + c
def addbytes(self, bytes):
self.buf = self.buf + bytes
def add16bit(self, n):
self.buf = self.buf + pack16bit(n)
def add32bit(self, n):
self.buf = self.buf + pack32bit(n)
def addaddr(self, addr):
n = addr2bin(addr)
self.buf = self.buf + pack32bit(n)
def addstring(self, s):
self.addbyte(chr(len(s)))
self.addbytes(s)
def addname(self, name):
# Domain name packing (section 4.1.4)
# Add a domain name to the buffer, possibly using pointers.
# The case of the first occurrence of a name is preserved.
# Redundant dots are ignored.
list = []
for label in string.splitfields(name, '.'):
if label:
if len(label) > 63:
raise PackError, 'label too long'
list.append(label)
keys = []
for i in range(len(list)):
key = string.upper(string.joinfields(list[i:], '.'))
keys.append(key)
if self.index.has_key(key):
pointer = self.index[key]
break
else:
i = len(list)
pointer = None
# Do it into temporaries first so exceptions don't
# mess up self.index and self.buf
buf = ''
offset = len(self.buf)
index = []
for j in range(i):
label = list[j]
n = len(label)
if offset + len(buf) < 0x3FFF:
index.append(keys[j], offset + len(buf))
else:
print 'DNS.Lib.Packer.addname:',
print 'warning: pointer too big'
buf = buf + (chr(n) + label)
if pointer:
buf = buf + pack16bit(pointer | 0xC000)
else:
buf = buf + '\0'
self.buf = self.buf + buf
for key, value in index:
self.index[key] = value
def dump(self):
keys = self.index.keys()
keys.sort()
print '-'*40
for key in keys:
print '%20s %3d' % (key, self.index[key])
print '-'*40
space = 1
for i in range(0, len(self.buf)+1, 2):
if self.buf[i:i+2] == '**':
if not space: print
space = 1
continue
space = 0
print '%4d' % i,
for c in self.buf[i:i+2]:
if ' ' < c < '\177':
print ' %c' % c,
else:
print '%2d' % ord(c),
print
print '-'*40
# Unpacking class
UnpackError = 'DNS.Lib.UnpackError' # Exception
class Unpacker:
def __init__(self, buf):
self.buf = buf
self.offset = 0
def getbyte(self):
c = self.buf[self.offset]
self.offset = self.offset + 1
return c
def getbytes(self, n):
s = self.buf[self.offset : self.offset + n]
if len(s) != n: raise UnpackError, 'not enough data left'
self.offset = self.offset + n
return s
def get16bit(self):
return unpack16bit(self.getbytes(2))
def get32bit(self):
return unpack32bit(self.getbytes(4))
def getaddr(self):
return bin2addr(self.get32bit())
def getstring(self):
return self.getbytes(ord(self.getbyte()))
def getname(self):
# Domain name unpacking (section 4.1.4)
c = self.getbyte()
i = ord(c)
if i & 0xC0 == 0xC0:
d = self.getbyte()
j = ord(d)
pointer = ((i<<8) | j) & ~0xC000
save_offset = self.offset
try:
self.offset = pointer
domain = self.getname()
finally:
self.offset = save_offset
return domain
if i == 0:
return ''
domain = self.getbytes(i)
remains = self.getname()
if not remains:
return domain
else:
return domain + '.' + remains
# Test program for packin/unpacking (section 4.1.4)
def testpacker():
N = 25
R = range(N)
import timing
# See section 4.1.4 of RFC 1035
timing.start()
for i in R:
p = Packer()
p.addbytes('*' * 20)
p.addname('f.ISI.ARPA')
p.addbytes('*' * 8)
p.addname('Foo.F.isi.arpa')
p.addbytes('*' * 18)
p.addname('arpa')
p.addbytes('*' * 26)
p.addname('')
timing.finish()
print round(timing.milli() * 0.001 / N, 3), 'seconds per packing'
p.dump()
u = Unpacker(p.buf)
u.getbytes(20)
u.getname()
u.getbytes(8)
u.getname()
u.getbytes(18)
u.getname()
u.getbytes(26)
u.getname()
timing.start()
for i in R:
u = Unpacker(p.buf)
res = (u.getbytes(20),
u.getname(),
u.getbytes(8),
u.getname(),
u.getbytes(18),
u.getname(),
u.getbytes(26),
u.getname())
timing.finish()
print round(timing.milli() * 0.001 / N, 3), 'seconds per unpacking'
for item in res: print item
# Pack/unpack RR toplevel format (section 3.2.1)
class RRpacker(Packer):
def __init__(self):
Packer.__init__(self)
self.rdstart = None
def addRRheader(self, name, type, klass, ttl, *rest):
self.addname(name)
self.add16bit(type)
self.add16bit(klass)
self.add32bit(ttl)
if rest:
if res[1:]: raise TypeError, 'too many args'
rdlength = rest[0]
else:
rdlength = 0
self.add16bit(rdlength)
self.rdstart = len(self.buf)
def patchrdlength(self):
rdlength = unpack16bit(self.buf[self.rdstart-2:self.rdstart])
if rdlength == len(self.buf) - self.rdstart:
return
rdata = self.buf[self.rdstart:]
save_buf = self.buf
ok = 0
try:
self.buf = self.buf[:self.rdstart-2]
self.add16bit(len(rdata))
self.buf = self.buf + rdata
ok = 1
finally:
if not ok: self.buf = save_buf
def endRR(self):
if self.rdstart is not None:
self.patchrdlength()
self.rdstart = None
def getbuf(self):
if self.rdstart is not None: self.patchrdlenth()
return Packer.getbuf(self)
# Standard RRs (section 3.3)
def addCNAME(self, name, klass, ttl, cname):
self.addRRheader(name, DNS.Type.CNAME, klass, ttl)
self.addname(cname)
self.endRR()
def addHINFO(self, name, klass, ttl, cpu, os):
self.addRRheader(name, DNS.Type.HINFO, klass, ttl)
self.addstring(cpu)
self.addstring(os)
self.endRR()
def addMX(self, name, klass, ttl, preference, exchange):
self.addRRheader(name, DNS.Type.MX, klass, ttl)
self.add16bit(preference)
self.addname(exchange)
self.endRR()
def addNS(self, name, klass, ttl, nsdname):
self.addRRheader(name, DNS.Type.NS, klass, ttl)
self.addname(nsdname)
self.endRR()
def addPTR(self, name, klass, ttl, ptrdname):
self.addRRheader(name, DNS.Type.PTR, klass, ttl)
self.addname(ptrdname)
self.endRR()
def addSOA(self, name, klass, ttl,
mname, rname, serial, refresh, retry, expire, minimum):
self.addRRheader(name, DNS.Type.SOA, klass, ttl)
self.addname(mname)
self.addname(rname)
self.add32bit(serial)
self.add32bit(refresh)
self.add32bit(retry)
self.add32bit(expire)
self.add32bit(minimum)
self.endRR()
def addTXT(self, name, klass, ttl, list):
self.addRRheader(name, DNS.Type.TXT, klass, ttl)
for txtdata in list:
self.addstring(txtdata)
self.endRR()
# Internet specific RRs (section 3.4) -- class = IN
def addA(self, name, ttl, address):
self.addRRheader(name, DNS.Type.A, DNS.Class.IN, ttl)
self.addaddr(address)
self.endRR()
def addWKS(self, name, ttl, address, protocol, bitmap):
self.addRRheader(name, DNS.Type.WKS, DNS.Class.IN, ttl)
self.addaddr(address)
self.addbyte(chr(protocol))
self.addbytes(bitmap)
self.endRR()
def prettyTime(seconds):
if seconds<60:
return seconds,"%d seconds"%(seconds)
if seconds<3600:
return seconds,"%d minutes"%(seconds/60)
if seconds<86400:
return seconds,"%d hours"%(seconds/3600)
if seconds<604800:
return seconds,"%d days"%(seconds/86400)
else:
return seconds,"%d weeks"%(seconds/604800)
class RRunpacker(Unpacker):
def __init__(self, buf):
Unpacker.__init__(self, buf)
self.rdend = None
def getRRheader(self):
name = self.getname()
type = self.get16bit()
klass = self.get16bit()
ttl = self.get32bit()
rdlength = self.get16bit()
self.rdend = self.offset + rdlength
return (name, type, klass, ttl, rdlength)
def endRR(self):
if self.offset != self.rdend:
raise UnpackError, 'end of RR not reached'
def getCNAMEdata(self):
return self.getname()
def getHINFOdata(self):
return self.getstring(), self.getstring()
def getMXdata(self):
return self.get16bit(), self.getname()
def getNSdata(self):
return self.getname()
def getPTRdata(self):
return self.getname()
def getSOAdata(self):
return self.getname(), \
self.getname(), \
('serial',)+(self.get32bit(),), \
('refresh ',)+prettyTime(self.get32bit()), \
('retry',)+prettyTime(self.get32bit()), \
('expire',)+prettyTime(self.get32bit()), \
('minimum',)+prettyTime(self.get32bit())
def getTXTdata(self):
list = []
while self.offset != self.rdend:
list.append(self.getstring())
return list
def getAdata(self):
return self.getaddr()
def getWKSdata(self):
address = self.getaddr()
protocol = ord(self.getbyte())
bitmap = self.getbytes(self.rdend - self.offset)
return address, protocol, bitmap
# Pack/unpack Message Header (section 4.1)
class Hpacker(Packer):
def addHeader(self, id, qr, opcode, aa, tc, rd, ra, z, rcode,
qdcount, ancount, nscount, arcount):
self.add16bit(id)
self.add16bit((qr&1)<<15 | (opcode*0xF)<<11 | (aa&1)<<10
| (tc&1)<<9 | (rd&1)<<8 | (ra&1)<<7
| (z&7)<<4 | (rcode&0xF))
self.add16bit(qdcount)
self.add16bit(ancount)
self.add16bit(nscount)
self.add16bit(arcount)
class Hunpacker(Unpacker):
def getHeader(self):
id = self.get16bit()
flags = self.get16bit()
qr, opcode, aa, tc, rd, ra, z, rcode = (
(flags>>15)&1,
(flags>>11)&0xF,
(flags>>10)&1,
(flags>>9)&1,
(flags>>8)&1,
(flags>>7)&1,
(flags>>4)&7,
(flags>>0)&0xF)
qdcount = self.get16bit()
ancount = self.get16bit()
nscount = self.get16bit()
arcount = self.get16bit()
return (id, qr, opcode, aa, tc, rd, ra, z, rcode,
qdcount, ancount, nscount, arcount)
# Pack/unpack Question (section 4.1.2)
class Qpacker(Packer):
def addQuestion(self, qname, qtype, qclass):
self.addname(qname)
self.add16bit(qtype)
self.add16bit(qclass)
class Qunpacker(Unpacker):
def getQuestion(self):
return self.getname(), self.get16bit(), self.get16bit()
# Pack/unpack Message(section 4)
# NB the order of the base classes is important for __init__()!
class Mpacker(RRpacker, Qpacker, Hpacker):
pass
class Munpacker(RRunpacker, Qunpacker, Hunpacker):
pass
# Routines to print an unpacker to stdout, for debugging.
# These affect the unpacker's current position!
def dumpM(u):
print 'HEADER:',
(id, qr, opcode, aa, tc, rd, ra, z, rcode,
qdcount, ancount, nscount, arcount) = u.getHeader()
print 'id=%d,' % id,
print 'qr=%d, opcode=%d, aa=%d, tc=%d, rd=%d, ra=%d, z=%d, rcode=%d,' \
% (qr, opcode, aa, tc, rd, ra, z, rcode)
if tc: print '*** response truncated! ***'
if rcode: print '*** nonzero error code! (%d) ***' % rcode
print ' qdcount=%d, ancount=%d, nscount=%d, arcount=%d' \
% (qdcount, ancount, nscount, arcount)
for i in range(qdcount):
print 'QUESTION %d:' % i,
dumpQ(u)
for i in range(ancount):
print 'ANSWER %d:' % i,
dumpRR(u)
for i in range(nscount):
print 'AUTHORITY RECORD %d:' % i,
dumpRR(u)
for i in range(arcount):
print 'ADDITIONAL RECORD %d:' % i,
dumpRR(u)
class DnsResult:
def __init__(self,u,args):
self.header={}
self.questions=[]
self.answers=[]
self.authority=[]
self.additional=[]
self.args=args
self.storeM(u)
def show(self):
import time
print '; <<>> PDG.py 1.0 <<>> %s %s'%(self.args['name'],
self.args['qtype'])
opt=""
if self.args['rd']:
opt=opt+'recurs '
h=self.header
print ';; options: '+opt
print ';; got answer:'
print ';; ->>HEADER<<- opcode %s, status %s, id %d'%(
h['opcode'],h['status'],h['id'])
flags=filter(lambda x,h=h:h[x],('qr','aa','rd','ra','tc'))
print ';; flags: %s; Ques: %d, Ans: %d, Auth: %d, Addit: %d'%(
string.join(flags),h['qdcount'],h['ancount'],h['nscount'],
h['arcount'])
print ';; QUESTIONS:'
for q in self.questions:
print ';; %s, type = %s, class = %s'%(q['qname'],q['qtypestr'],
q['qclassstr'])
print
print ';; ANSWERS:'
for a in self.answers:
print '%-20s %-6s %-6s %s'%(a['name'],`a['ttl']`,a['typename'],
a['data'])
print
print ';; AUTHORITY RECORDS:'
for a in self.authority:
print '%-20s %-6s %-6s %s'%(a['name'],`a['ttl']`,a['typename'],
a['data'])
print
print ';; ADDITIONAL RECORDS:'
for a in self.additional:
print '%-20s %-6s %-6s %s'%(a['name'],`a['ttl']`,a['typename'],
a['data'])
print
if self.args.has_key('elapsed'):
print ';; Total query time: %d msec'%self.args['elapsed']
print ';; To SERVER: %s'%(self.args['server'])
print ';; WHEN: %s'%time.ctime(time.time())
def storeM(self,u):
(self.header['id'], self.header['qr'], self.header['opcode'],
self.header['aa'], self.header['tc'], self.header['rd'],
self.header['ra'], self.header['z'], self.header['rcode'],
self.header['qdcount'], self.header['ancount'],
self.header['nscount'], self.header['arcount']) = u.getHeader()
self.header['opcodestr']=DNS.Opcode.opcodestr(self.header['opcode'])
self.header['status']=DNS.Status.statusstr(self.header['rcode'])
for i in range(self.header['qdcount']):
#print 'QUESTION %d:' % i,
self.questions.append(self.storeQ(u))
for i in range(self.header['ancount']):
#print 'ANSWER %d:' % i,
self.answers.append(self.storeRR(u))
for i in range(self.header['nscount']):
#print 'AUTHORITY RECORD %d:' % i,
self.authority.append(self.storeRR(u))
for i in range(self.header['arcount']):
#print 'ADDITIONAL RECORD %d:' % i,
self.additional.append(self.storeRR(u))
def storeQ(self,u):
q={}
q['qname'], q['qtype'], q['qclass'] = u.getQuestion()
q['qtypestr']=DNS.Type.typestr(q['qtype'])
q['qclassstr']=DNS.Class.classstr(q['qclass'])
return q
def storeRR(self,u):
r={}
r['name'],r['type'],r['class'],r['ttl'],r['rdlength'] = u.getRRheader()
r['typename'] = DNS.Type.typestr(r['type'])
r['classstr'] = DNS.Class.classstr(r['class'])
#print 'name=%s, type=%d(%s), class=%d(%s), ttl=%d' \
# % (name,
# type, typename,
# klass, DNS.Class.classstr(class),
# ttl)
mname = 'get%sdata' % r['typename']
if hasattr(u, mname):
r['data']=getattr(u, mname)()
else:
r['data']=u.getbytes(rdlength)
return r
def dumpQ(u):
qname, qtype, qclass = u.getQuestion()
print 'qname=%s, qtype=%d(%s), qclass=%d(%s)' \
% (qname,
qtype, DNS.Type.typestr(qtype),
qclass, DNS.Class.classstr(qclass))
def dumpRR(u):
name, type, klass, ttl, rdlength = u.getRRheader()
typename = DNS.Type.typestr(type)
print 'name=%s, type=%d(%s), class=%d(%s), ttl=%d' \
% (name,
type, typename,
klass, DNS.Class.classstr(klass),
ttl)
mname = 'get%sdata' % typename
if hasattr(u, mname):
print ' formatted rdata:', getattr(u, mname)()
else:
print ' binary rdata:', u.getbytes(rdlength)

16
DNS/Opcode.py Normal file
View file

@ -0,0 +1,16 @@
# Opcode values in message header (section 4.1.1)
QUERY = 0
IQUERY = 1
STATUS = 2
# Construct reverse mapping dictionary
_names = dir()
opcodemap = {}
for _name in _names:
if _name[0] != '_': opcodemap[eval(_name)] = _name
def opcodestr(opcode):
if opcodemap.has_key(opcode): return opcodemap[opcode]
else: return `opcode`

19
DNS/Status.py Normal file
View file

@ -0,0 +1,19 @@
# Status values in message header
NOERROR = 0
FORMERR = 1
SERVFAIL = 2
NXDOMAIN = 3
NOTIMP = 4
REFUSED = 5
# Construct reverse mapping dictionary
_names = dir()
statusmap = {}
for _name in _names:
if _name[0] != '_': statusmap[eval(_name)] = _name
def statusstr(status):
if statusmap.has_key(status): return statusmap[status]
else: return `status`

42
DNS/Type.py Normal file
View file

@ -0,0 +1,42 @@
# TYPE values (section 3.2.2)
A = 1 # a host address
NS = 2 # an authoritative name server
MD = 3 # a mail destination (Obsolete - use MX)
MF = 4 # a mail forwarder (Obsolete - use MX)
CNAME = 5 # the canonical name for an alias
SOA = 6 # marks the start of a zone of authority
MB = 7 # a mailbox domain name (EXPERIMENTAL)
MG = 8 # a mail group member (EXPERIMENTAL)
MR = 9 # a mail rename domain name (EXPERIMENTAL)
NULL = 10 # a null RR (EXPERIMENTAL)
WKS = 11 # a well known service description
PTR = 12 # a domain name pointer
HINFO = 13 # host information
MINFO = 14 # mailbox or mail list information
MX = 15 # mail exchange
TXT = 16 # text strings
AAAA = 28 # IPv6 AAAA records (RFC 1886)
# Additional TYPE values from host.c source
UNAME = 110
MP = 240
# QTYPE values (section 3.2.3)
AXFR = 252 # A request for a transfer of an entire zone
MAILB = 253 # A request for mailbox-related records (MB, MG or MR)
MAILA = 254 # A request for mail agent RRs (Obsolete - see MX)
ANY = 255 # A request for all records
# Construct reverse mapping dictionary
_names = dir()
typemap = {}
for _name in _names:
if _name[0] != '_': typemap[eval(_name)] = _name
def typestr(type):
if typemap.has_key(type): return typemap[type]
else: return `type`

10
DNS/__init__.py Normal file
View file

@ -0,0 +1,10 @@
# __init__.py for DNS class.
Error='DNS API error'
import Type,Opcode,Status,Class
from Base import *
from Lib import *
from lazy import *
Request = DnsRequest
Result = DnsResult

266
DNS/asyncore.py Normal file
View file

@ -0,0 +1,266 @@
# -*- Mode: Python; tab-width: 4 -*-
# $Id$
# Author: Sam Rushing <rushing@nightmare.com>
# A simple unix version of the asynchronous socket support.
# There are lots of problems with this still - I only wrote it to show
# that it could be done, and for my own testing purposes.
# [960206: servtest, asynfing, asynhttp, and pop3demo work, asyndns doesn't.]
# [960321: servtest, asynfing, asynhttp, pop3demo, pop3_2 work]
import select
import socket
import sys
# you need to generate ERRNO.py from Tools/scripts/h2py.py in the Python
# distribution.
try:
import ERRNO
except ImportError:
raise ImportError,'you need to generate ERRNO.py from Tools/scripts/h2py.py in the Python distribution'
# look what I can get away with... 8^)
socket.socket_map = {}
ALL_EVENTS = []
DEFAULT_TIMEOUT = 30.0
loop_running = 0
stop_loop_exception = "stop running the select loop"
# we want to select for read only those sockets
# to which we are already connected to, -OR- those
# sockets we are accepting on.
def readables (sock_fds):
sm = socket.socket_map
def readable_test (fd, sm=sm):
sock = sm[fd]
return sock.connected or sock.accepting
return filter (readable_test, sock_fds)
# only those fd's we are 'write blocked' on, -OR-
# those sockets we are waiting for a connection on.
def writables (sock_fds):
sm = socket.socket_map
def writable_test (fd, sm=sm):
sock = sm[fd]
return sock.write_blocked or not sock.connected
return filter (writable_test, sock_fds)
def loop(timeout=DEFAULT_TIMEOUT):
loop_running = 1
try:
while 1:
sock_fds = socket.socket_map.keys()
read_fds = readables (sock_fds)
write_fds = writables (sock_fds)
expt_fds = sock_fds[:]
(read_fds,
write_fds,
expt_fds) = select.select (read_fds,
write_fds,
expt_fds,
timeout)
print read_fds,write_fds,expt_fds
try:
for x in expt_fds:
socket.socket_map[x].handle_expt_event()
for x in read_fds:
socket.socket_map[x].handle_read_event()
for x in write_fds:
socket.socket_map[x].handle_write_event()
except KeyError:
# handle_expt handle_read might remove as socket
# from the map by calling self.close().
pass
except stop_loop_exception:
print 'loop stopped'
class dispatcher:
def __init__ (self, sock=None):
self.debug = 0
self.log_queue = []
self.connected = 0
self.accepting = 0
self.write_blocked = 1
if sock:
self.socket = sock
self.fileno = self.socket.fileno()
# I think it should inherit this anyway
self.socket.setblocking (0)
self.connected = 1
self.add_channel()
def add_channel (self, events=ALL_EVENTS):
self.log ('adding channel %s' % self)
socket.socket_map [self.fileno] = self
def del_channel (self):
if socket.socket_map.has_key (self.fileno):
del socket.socket_map [self.fileno]
if not len(socket.socket_map.keys()):
raise stop_loop_exception
def create_socket (self, family, type):
self.socket = socket.socket (family, type)
self.socket.setblocking(0)
self.fileno = self.socket.fileno()
self.add_channel()
def bind (self, *args):
return apply (self.socket.bind, args)
def go (self):
if not loop_running:
loop()
def listen (self, num):
self.accepting = 1
self.socket.listen (num)
def accept (self):
return self.socket.accept()
def connect (self, host, port):
try:
self.socket.connect (host, port)
except socket.error, why:
if type(why) == type(()) \
and why[0] in (ERRNO.EINPROGRESS, ERRNO.EALREADY, ERRNO.EWOULDBLOCK):
return
else:
raise socket.error, why
self.connected = 1
self.handle_connect()
def send (self, data):
try:
result = self.socket.send (data)
if result != len(data):
self.write_blocked = 1
else:
self.write_blocked = 0
return result
except socket.error, why:
if type(why) == type(()) and why[0] == ERRNO.EWOULDBLOCK:
self.write_blocked = 1
return 0
else:
raise socket.error, why
return 0
def recv (self, buffer_size):
data = self.socket.recv (buffer_size)
if not data:
self.handle_close()
return ''
else:
return data
def close (self):
self.socket.close()
self.del_channel()
def shutdown (self, how):
self.socket.shutdown (how)
def log (self, message):
#self.log_queue.append ('%s:%d %s' %
# (self.__class__.__name__, self.fileno, message))
print 'log:', message
def done (self):
self.print_log()
def print_log (self):
for x in self.log_queue:
print x
def handle_read_event (self):
# getting a read implies that we are connected
if not self.connected:
self.handle_connect()
self.connected = 1
self.handle_read()
elif self.accepting:
if not self.connected:
self.connected = 1
self.handle_accept()
else:
self.handle_read()
def more_to_send (self, yesno=1):
self.write_blocked = yesno
def handle_write_event (self):
# getting a read implies that we are connected
if not self.connected:
self.handle_connect()
self.connected = 1
self.write_blocked = 0
self.handle_write()
def handle_expt_event (self):
self.handle_error()
def handle_error (self, error=0):
self.close()
def handle_read (self):
self.log ('unhandled FD_READ')
def handle_write (self):
self.log ('unhandled FD_WRITE')
def handle_connect (self):
self.log ('unhandled FD_CONNECT')
def handle_oob (self):
self.log ('unhandled FD_OOB')
def handle_accept (self):
self.log ('unhandled FD_ACCEPT')
def handle_close (self):
self.log ('unhandled FD_CLOSE')
def handle_disconnect (self, error):
self.log ('unexpected disconnect, error:%d' % error)
# ---------------------------------------------------------------------------
# adds async send capability, useful for simple clients.
# ---------------------------------------------------------------------------
class dispatcher_with_send (dispatcher):
def __init__ (self, sock=None):
dispatcher.__init__ (self, sock)
self.out_buffer = ''
def initiate_send (self):
while self.out_buffer:
num_sent = 0
num_sent = dispatcher.send (self, self.out_buffer[:512])
self.out_buffer = self.out_buffer[num_sent:]
def handle_write (self):
self.initiate_send()
def send (self, data):
if self.debug:
self.log ('sending %s' % repr(data))
self.out_buffer = data
self.initiate_send()
# ---------------------------------------------------------------------------
# used a lot when debugging
# ---------------------------------------------------------------------------
def close_all ():
for x in socket.socket_map.items():
x[1].socket.close()
socket.socket_map = {}

24
DNS/lazy.py Normal file
View file

@ -0,0 +1,24 @@
# $Id$
# routines for lazy people.
import Base
def revlookup(name):
"convenience routine for doing a reverse lookup of an address"
import string
a = string.split(name, '.')
a.reverse()
b = string.join(a, '.')+'.in-addr.arpa'
# this will only return one of any records returned.
return Base.DnsRequest(b, qtype = 'ptr').req().answers[0]['data']
def mxlookup(name):
"""
convenience routine for doing an MX lookup of a name. returns a
sorted list of (preference, mail exchanger) records
"""
a = Base.DnsRequest(name, qtype = 'mx').req().answers
l = map(lambda x:x['data'], a)
l.sort()
return l

43
GML/GMLLexer.py Normal file
View file

@ -0,0 +1,43 @@
import sys,re
import PyLR
def _intfunc(m):
return int(m.group(0))
def _realfunc(m):
return float(m.group(0))
class GMLLexer(PyLR.Lexer):
"""The GML lexical scanner."""
def __init__(self):
PyLR.Lexer.__init__(self)
self.addpat(r"[-+]?(\d+\.\d*|\d*\.\d+)([Ee][-+]?\d+)?",
"REAL", _realfunc)
self.addpat(r"[-+]?\d+", "INT", _intfunc)
self.addpat(r"\[", "LSQB")
self.addpat(r"\]", "RSQB")
self.addpat(r'"([^&"]+|&[a-zA-Z]+;)*"', "STRING")
self.addpat(r"[a-zA-Z][a-zA-Z0-9]*", "KEY")
self.addpat(r"#[^\n]*", "", None, PyLR.SKIPTOK)
self.addpat(r"\s+", "", None, PyLR.SKIPTOK)
def _test():
gmltest = """# a graph example
graph [ # comment at end of line
node [
real1 1.e3
real2 .01
int1 00050
label "Wallerfang&amp;Ballern"
]
]
"""
# create the lexer
lexer = GMLLexer()
lexer.settext(gmltest)
tok=1
while tok:
tok, val = lexer.scan(1)
if __name__ == '__main__':
_test()

45
GML/grammarspec.txt Normal file
View file

@ -0,0 +1,45 @@
# a GML parser
# Here is the GML grammar
# corrected from me because the original at
# http://www.uni-passau.de/Graphlet/GML had some errors
#
# corrections are
# (1) use instring* in string
# (2) add character,lowercase,uppercase definitions
# (3) skip whitespace definition, this is obvious
# (4) use digit+ in mantissa
# (5) either intpart or fraction of a real must contain a number
# (6) comments can be on a separate or at the end of the line
#
# gml: list
# list: (whitespace* key whitespace+ value)*
# value: integer | real | string | "[" list "]"
# key: character (character | digit)*
# integer: sign digit+
# real: sign (digit+ "." digit* | digit* "." digit+) mantissa
# string: """ instring* """
# sign: "+" | "-" |
# digit: "0"..."9"
# character: lowercase | uppercase
# lowercase: "a"..."z"
# uppercase: "A"..."Z"
# mantissa: ("E"|"e") sign digit+ |
# instring: <ASCII except "&" and """> | "&" character+ ";"
#
# Note that integers and reals can have prefixed zeros, e.g. 001 is 1
_class GMLParser
_code import GMLLexer
_lex GMLLexer.GMLLexer()
# manually reduced
"""
list: list KEY value (key_value) |
(endoflist) ;
value: INTEGER |
REAL |
STRING |
LSQB list RSQB (beginlist) ;
"""

28
INSTALL Normal file
View file

@ -0,0 +1,28 @@
LinkChecker installation
==========================
First, decompress the archive.
With linkchecker-x.x.x.tar.bz2 do "tar xIvf linkchecker-x.x.x.tar.bz2".
With linkchecker-x.x.x.zip do "unzip linkchecker-x.x.x.zip" or use Winzip.
With linkchecker-x.x.x.deb do "dpkg -i linkchecker-x.x.x.deb" as root and you
are done.
Unix Users:
1. Edit the file linkchecker.
Adjust the argument to sys.path.append to point to the distribution
directory.
2. Copy linkchecker to a location in your PATH (or make a symlink).
3. Check links happily by typing `linkchecker`.
Windows Users:
1. Edit the file linkchecker.
Adjust the argument to sys.path.append to point to the distribution
directory.
2. Edit the file linkchecker.bat.
a) Adjust the PYHTON variable to point to python.exe.
b) Adjust the LINKCHECKER variable to point to the distribution directory.
3. Add the distribution directory to your PATH.
4. Check links happily by typing `linkchecker.bat`.
You need Python >= 1.5.2
You get Python from http://www.python.org

339
LICENSE Normal file
View file

@ -0,0 +1,339 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
675 Mass Ave, Cambridge, MA 02139, USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Library General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) 19yy <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) 19yy name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Library General
Public License instead of this License.

53
Makefile Normal file
View file

@ -0,0 +1,53 @@
VERSION=0.9.0
HOST=treasure.calvinsplayground.de
#HOST=fsinfo.cs.uni-sb.de
PACKAGE = linkchecker
BZ2PACKAGE = $(PACKAGE)-$(VERSION).tar.bz2
DEBPACKAGE = $(PACKAGE)_$(VERSION)_i386.deb
ZIPPACKAGE = $(PACKAGE)-$(VERSION).zip
ALLPACKAGES = ../$(BZ2PACKAGE) ../$(DEBPACKAGE) ../$(ZIPPACKAGE)
.PHONY: test clean files install all
TAR = tar
ZIP = zip
prefix = /usr/local
all:
clean:
rm -f $(ALLPACKAGES) $(PACKAGE)-out.*
files: all
./$(PACKAGE) -q -Wtext -Whtml -Wgml -Wsql -R -r2 -v -i "$(HOST)" http://$(HOST)/~calvin/
install: install-dirs
install -m644 linkcheck/*.py? $(DESTDIR)/usr/share/$(PACKAGE)/linkcheck
install -m644 DNS/*.py? $(DESTDIR)/usr/share/$(PACKAGE)/DNS
install -m644 *.py? $(DESTDIR)/usr/share/$(PACKAGE)
install -m755 $(PACKAGE) $(DESTDIR)/usr/bin
install -m644 $(PACKAGE)rc $(DESTDIR)/etc
install-dirs:
install -d -m755 \
$(DESTDIR)/usr/share/$(PACKAGE)/linkcheck \
$(DESTDIR)/usr/share/$(PACKAGE)/DNS \
$(DESTDIR)/usr/share/$(PACKAGE)/GML \
$(DESTDIR)/usr/share/$(PACKAGE)/PyLR \
$(DESTDIR)/usr/bin \
$(DESTDIR)/etc
dist: files
dh_clean
cd .. && $(TAR) cIhf $(BZ2PACKAGE) $(PACKAGE)
cd .. && $(ZIP) -r $(ZIPPACKAGE) $(PACKAGE)
fakeroot debian/rules binary
package:
cd .. && $(TAR) cIhf $(BZ2PACKAGE) $(PACKAGE)
test:
rm -f test/*.result
@for i in test/*.html; do \
echo "Testing $$i. Results are in $$i.result"; \
./$(PACKAGE) -v -a $$i > $$i.result 2>&1; \
done

853
PyLR/Grammar.py Normal file
View file

@ -0,0 +1,853 @@
__version__ = "$Id$"
import time,string,types,parsertemplate
class PyLRParseError(ParseError):
pass
class Production:
"""Production -- a Grammar is really just a list of productions.
The expected structure is a symbol for the LHS and a list of
symbols or symbols for the RHS."""
def __init__(self, LHS, RHS, funcname="unspecified"):
self.LHS = LHS
self.RHS = RHS
self.funcname = funcname
self.func = None # will be assigned dynamically
self.toklist = None
def setfunc(self, func):
""".setfunc(<callable>) --used for the dynamic production
of a parseengine directly from Grammar.mkengine(), instead of tables
saved to a file."""
self.func = func
def setfuncname(self, name):
""".setfuncname("") -- used by Grammar.writefile to produce
prodinfo table that. .setfunc associates a function value
with the production for runtime, on the fly productions
of parsing engine from Grammar."""
self.funcname = name
def __len__(self):
return len(self.RHS)
def __repr__(self):
return self.getrep()
def getrep(self, toklist=None):
s = self.LHS+":"
for t in self.RHS:
if type(t)==types.IntType and toklist:
s = s+" "+toklist[t]
else:
s = s+" "+str(t)
if self.funcname: s = s+" ("+self.funcname+")"
return s
def items(self):
return range(len(self.RHS) + 1)
class LR1Grammar:
"""Provides methods for producing the actiontable, the gototable, and the
prodinfo table. Using these functions, it can produce a python source
code file with these tables or a parsing engine.
Note that we assume the first production (productions[0]) to be the start
symbol."""
EPS = "<EPS>"
EOF = "<EOF>"
DummyLA = -1
def __init__(self, productions, tokens=[], verbose=0):
self.verbose = verbose
self.productions = productions
self.tokens = tokens
self.nonterminals = []
for p in self.productions:
if p.LHS not in self.nonterminals:
self.nonterminals.append(p.LHS)
if self.verbose:
print "Nonterminals:", self.nonterminals
self.terminals = []
for p in self.productions:
for s in p.RHS:
if not (s in self.terminals or s in self.nonterminals):
self.terminals.append(s)
self.terminals.sort()
if self.verbose:
print "Terminals:", self.terminals
# reduce the grammar
self._reduceGrammar()
# build map with productions who have the same LHS
self.lhsprods = {}
for lhs in self.nonterminals:
self.lhsprods[lhs] = filter(lambda x,l=lhs: x.LHS==l, self.productions)
# immediate epsilon productions
pi = 1
self.epslhs = {}
for p in self.productions:
if p.RHS == []:
self.epslhs[p.LHS] = pi
pi = pi + 1
# derived epsilon productions
self.lhsdereps = self._mklhsdereps()
# the FIRST function for the LR(1) grammar, implemented as a map
self.firstmap = self._mkfirstmap()
def _reduceGrammar(self):
"""Definitions:
(1) not productive
a nonterminal A is not productive iff there is no
word u with A ==>* u
This means A produces no words in the grammar.
(2) not reachable
a nonterminal A is no reachable iff there are no words
a,b with S ==>* aAb
This means A occurs never in a parsetree if we derive a word.
This function eliminates all nonterminals which are not productive
or not reachable.
If we reduce the start symbol, the grammar produces nothing and
a ParseException is thrown.
References: [R. Wilhelm, D.Maurer: "Ubersetzerbau, p. 300f]
"""
# productive nonterminals
productive_nts = []
# rest_nt[p] == the number of nonterminals in p.RHS which are not yet
# marked as productive
# if rest_nt[p]==0 then p is productive
rest_nt = {}
# if we find a productive nonterminal A, we have to inspect all
# other nonterminals with A. this is the reason we add all found
# productive nts to this list
workedon_nts = []
# mark terminals as productive (even epsilon-prductions)
for p in self.productions:
rest_nt[p]= len(filter(lambda x, s=self: x in s.nonterminals, p.RHS))
if rest_nt[p]==0:
productive_nts[p] = 1
workedon_nts.append(p)
# work on the productive list
while len(workedon_nts):
x = workedon_nts[0]
# search for production p with x in p.RHS
for p in filter(lambda p, _x=x: _x in p.RHS, self.productions):
rest_nt[p] = rest_nt[p] - 1
if not p.LHS in productive_nts:
productive_nts.append(p.LHS)
workedon_nts.append(p.LHS)
workedon_nts.remove(x)
if not self.productions[0].LHS in productive_nts:
raise PyLRParseError, "start symbol of grammar is not productive"
# reachable nonterminals
reachable_nts = self.productions[0]
added=1
while added:
added = 0
for p in self.productions:
for r in p.RHS:
if p.LHS in reachable_nts and (r in self.nonterminals and
r not in reachable_nts):
reachable_nts.append(r)
added = 1
# reduce the grammar
self.productions = filter(lambda p,
pnt=productive_nts,
rnt=reachable_nts: p.LHS in pnt or p.LHS in rnt,
self.productions)
def __repr__(self):
"""I like functional programming :)"""
return string.join(map(lambda x,s=self: x.getrep(s.tokens),
self.productions),";\n")+";"
def _mklhsdereps(self):
"""determines the nonterminals that derive nothing (epsilon)"""
pi = 1
res = {}
for p in self.productions:
if p.RHS == []:
res[p.LHS] = pi
pi = pi + 1
workingnonterms = []
for nt in self.nonterminals:
if not res.has_key(nt):
workingnonterms.append(nt)
while 1:
toremove = []
for nt in workingnonterms:
if not res.has_key(nt):
for p in self.lhsprods[nt]:
if len(p.RHS) == 1 and res.has_key(p.RHS[0]):
res[p.LHS] = res[p.RHS[0]]
toremove.append(nt)
break
if not toremove:
break
for r in toremove:
workingnonterms.remove(r)
return res
def _mkfirstmap(self):
"""return a dictionary keyed by symbol whose values are the set
of terminals that can precede that symbol
"""
res = {}
for sym in self.terminals+[Grammar.EPS, Grammar.EOF, Grammar.DummyLA]:
res[sym] = {sym: 1}
added=1
while added:
added = 0
for nt in self.nonterminals:
firsts = res.get(nt, {})
for p in self.lhsprods[nt]:
if not p.RHS:
if not firsts.has_key(Grammar.EPS):
added = firsts[Grammar.EPS] = 1
for i in range(len(p.RHS)):
f = res.get(p.RHS[i], {})
for t in f.keys():
if not firsts.has_key(t):
added = firsts[t] = 1
if not self.lhsdereps.has_key(p.RHS[i]):
break
res[nt] = firsts
for s in res.keys():
res[s] = res[s].keys()
return res
# these function are used as the grammar produces the tables (or writes
# them to a file)
def firstofstring(self, gs_list):
tmpres = {}
allhaveeps = 1
for x in range(len(gs_list)):
tmp = self.firstmap[gs_list[x]]
for s in tmp:
tmpres[s] = 1
if Grammar.EPS in tmp:
del tmpres[Grammar.EPS]
else:
allhaveeps = 0
break
if allhaveeps:
tmpres[Grammar.EPS] = 1
return tmpres.keys()
def augment(self):
"""this function adds a production S' -> S to the grammar where S was
the start symbol.
"""
lhss = map(lambda x: x.LHS, self.productions)
newsym = self.productions[0].LHS
while 1:
newsym = newsym + "'"
if newsym not in lhss:
break
self.productions.insert(0, Production(newsym,
[self.productions[0].LHS]))
# follow is not used yet, but probably will be in determining error reporting/recovery
def follow(self):
eof = Grammar.EOF
follow = {}
startsym = self.productions[0].LHS
follow[startsym] = [eof]
nts = self.nonterminals
for p in self.productions:
cutoff = range(len(p.RHS))
cutoff.reverse()
for c in cutoff[:-1]: # all but the first of the RHS elements
f = self.firstmap[p.RHS[c]]
if Grammar.EPS in f:
f.remove(Grammar.EPS)
if follow.has_key(p.RHS[c - 1]):
if p.RHS[c -1] in nts:
follow[p.RHS[c -1]] = follow[p.RHS[c - 1]] + f[:]
else:
if p.RHS[c -1] in nts:
follow[p.RHS[c - 1]] = f[:]
for p in self.productions:
if not p.RHS: continue
cutoff = range(len(p.RHS))
cutoff.reverse()
if p.RHS[-1] in nts:
if follow.has_key(p.LHS):
add = follow[p.LHS]
else:
add = []
if follow.has_key(p.RHS[-1]):
follow[p.RHS[-1]] = follow[p.RHS[-1]] + add
else:
follow[p.RHS[-1]] = add
for c in cutoff[:-1]:
f = self.firstmap[p.RHS[c]]
if Grammar.EPS in f:
if follow.has_key(p.LHS):
add = follow[p.LHS]
else:
add = []
if follow.has_key(p.RHS[c-1]):
follow[p.RHS[c-1]] = follow[p.RHS[c-1]] + add
elif add:
follow[p.RHS[c - 1]] = add
for k in follow.keys():
d = {}
for i in follow[k]:
d[i] = 1
follow[k] = d.keys()
return follow
def closure(self, items):
res = items[:]
todo = items[:]
more = 1
while more:
more = []
for (prodind, rhsind), term in todo:
if rhsind >= len(self.productions[prodind].RHS):
continue
for p in self.lhsprods.get(self.productions[prodind].RHS[rhsind], []):
try:
newpart = self.productions[prodind].RHS[rhsind + 1]
except IndexError:
newpart = Grammar.EPS
stringofsyms = [newpart, term]
for t in self.firstofstring(stringofsyms):
if ((self.productions.index(p), 0), t) not in res:
more.append(((self.productions.index(p), 0), t))
if term == Grammar.EOF and newpart == Grammar.EPS:
if ((self.productions.index(p), 0), Grammar.EOF) not in res:
more.append(((self.productions.index(p), 0), Grammar.EOF))
if more:
res = res + more
todo = more
return res
def goto(self, items, sym):
itemset = []
for (prodind, rhsind), term in items:
try:
if self.productions[prodind].RHS[rhsind] == sym and ((prodind, rhsind+1), term) not in itemset:
itemset.append( ((prodind, rhsind +1), term))
except IndexError:
pass
return self.closure(itemset)
def default_prodfunc(self):
"""for mkengine, this will produce a default function for those
unspecified
"""
return lambda *args: args[0]
def prodinfotable(self):
"""returns a list of three pieces of info for each production.
The first is the lenght of the production, the second is the
function(name) associated with the production and the third is
is the index of the lhs in a list of nonterminals.
"""
res = []
for p in self.productions:
lhsind = self.nonterminals.index(p.LHS)
func = p.func
if not func:
func = self.default_prodfunc()
plen = len(p.RHS)
if p.RHS == [Grammar.EPS]:
plen = 0
res.append((plen, func, lhsind))
return res
class LALRGrammar(LR1Grammar):
def __init__(self, prods, toks=[]):
Grammar.__init__(self, prods, toks)
self.LALRitems = []
#
# this is to help mak epsilon productions work with kernel items
# and to compute goto transitions from kernel
print "computing ntfirsts..."
self.ntfirstmap = self._mkntfirstmap()
#
# this is to help make shifts work with only kernel items
print "computing tfirsts..."
self.tfirstmap = self._mktfirstmap()
#
# another thing to help epsilon productions
print "computing follows..."
self.followmap = self.follow()
def _mkntfirstmap(self):
"""computes all nonterms A, first of (strings n) such that some
nonterminal B derives [A, n] in zero or more steps of (rightmost)
derivation. used to help make epsilon productions quickly calculable.
(B may == A)
"""
res = {}
for p in self.productions:
if p.RHS and p.RHS[0] in self.nonterminals:
fos = self.firstofstring(p.RHS[1:])
fos.sort()
if not res.has_key(p.LHS):
res[p.LHS] = {}
if not res[p.LHS].has_key(p.RHS[0]):
res[p.LHS][p.RHS[0]] = []
for i in fos:
if i not in res[p.LHS].get(p.RHS[0], []):
res[p.LHS][p.RHS[0]] = fos
while 1:
foundmore = 0
reskeys = res.keys()
for nt in reskeys:
rhsdict = res[nt]
for rnt in rhsdict.keys():
if rnt in reskeys:
d = res[rnt]
for k in d.keys():
if not res[nt].has_key(k):
fos = self.firstofstring(d[k]+ res[nt][rnt])
foundmore = 1
fos.sort()
res[nt][k] = fos
else:
fos = self.firstofstring(d[k] + res[nt][rnt])
fos.sort()
if fos != res[nt][k]: # then res[nt][k] is contained in fos
foundmore = 1
res[nt][k] = fos
if not foundmore:
break
#
# this part accounts for the fact that a nonterminal will
# produce exactly itself in zero steps
#
for p in self.productions:
if res.has_key(p.LHS):
res[p.LHS][p.LHS] = [Grammar.EPS]
else:
res[p.LHS] = {p.LHS: [Grammar.EPS]}
return res
def newmkntfirstmap(self):
"""computes all nonterms A, first of (strings n) such that some
nonterminal B derives [A, n] in zero or more steps of (rightmost)
derivation. used to help make epsilon productions quickly calculable.
(B may == A)
"""
res = {}
pi = 0
for p in self.productions:
if p.RHS and p.RHS[0] in self.nonterminals:
if not res.has_key(p.LHS):
res[p.LHS] = {}
if not res[p.LHS].has_key(p.RHS[0]):
res[p.LHS][p.RHS[0]] = 1
while 1:
foundmore = 0
reskeys = res.keys()
for nt in reskeys:
rhsdict = res[nt]
for rnt in rhsdict.keys():
if rnt in reskeys:
d = res[rnt]
for k in d.keys():
if not res[nt].has_key(k):
foundmore = 1
res[nt][k] = 1
if not foundmore:
break
#
# this part accounts for the fact that a nonterminal will
# produce exactly itself in zero steps
#
for p in self.productions:
if res.has_key(p.LHS):
res[p.LHS][p.LHS] = 1
else:
res[p.LHS] = {p.LHS: 1}
return res
def _mktfirstmap(self):
"""for each nonterminal C, compute the set of all terminals a, such
that C derives ax in zero or more steps of (rightmost) derivation
where the last derivation is not an epsilon (empty) production.
assumes .mkfirstntmap() has been run and has already produced
self.ntfirstmap
"""
res = {}
for p in self.productions:
if not res.has_key(p.LHS):
res[p.LHS] = []
if p.RHS and p.RHS[0] in self.terminals:
res[p.LHS].append(p.RHS[0])
while 1:
foundmore = 0
reskeys = res.keys()
for nt in self.ntfirstmap.keys():
arrows = self.ntfirstmap[nt]
for k in arrows.keys():
for t in res[k]:
if t not in res[nt]:
foundmore = 1
res[nt].append(t)
if not foundmore:
break
return res
def goto(self, itemset, sym):
res = []
for (pi, ri) in itemset:
if ri == len(self.productions[pi].RHS):
continue
s = self.productions[pi].RHS[ri]
if s == sym:
res.append((pi, ri+1))
d = self.ntfirstmap.get(s, {})
for k in d.keys():
for p in self.lhsprods[k]:
if p.RHS and p.RHS[0] == sym:
i = self.productions.index(p)
if (i, 1) not in res: res.append((i, 1))
res.sort()
return res
def lookaheads(self, itemset):
setsofitems = kernels = self.kernelitems
spontaneous = []
propagates = {}
gotomap = {}
for (kpi, kri) in itemset:
C = self.closure([((kpi, kri), Grammar.DummyLA)])
for (cpi, cri), t in C:
if (cri) == len(self.productions[cpi].RHS):
continue
s = self.productions[cpi].RHS[cri]
if gotomap.has_key(s):
newstate = gotomap[s]
else:
newstate = setsofitems.index(self.goto(itemset, s))
gotomap[s] = newstate
if t != Grammar.DummyLA:
spontaneous.append((newstate, (cpi, cri+1), t))
else:
if propagates.has_key((kpi, kri)):
propagates[(kpi, kri)].append((newstate, (cpi, cri+1)))
else:
propagates[(kpi, kri)]=[(newstate, (cpi, cri+1))]
return spontaneous, propagates
def kernelsoflalr1items(self):
res = [[(0, 0)]]
todo = [[(0, 0)]]
while 1:
newtodo = []
for items in todo:
for s in self.terminals + self.nonterminals + [Grammar.EOF]:
g = self.goto(items, s)
if g and g not in res:
newtodo.append(g)
if not newtodo:
break
else:
if self.verbose:
print "found %d more kernels" % (len(newtodo))
res = res + newtodo
todo = newtodo
res.sort()
return res
def initLALR1items(self):
self.kernelitems = kernels = self.kernelsoflalr1items()
props = {}
la_table = []
for x in range(len(kernels)):
la_table.append([])
for y in range(len(kernels[x])):
la_table[x].append([])
la_table[0][0] = [Grammar.EOF]
if self.verbose:
print "initLALR1items, kernels done, calculating propagations and spontaneous lookaheads"
state_i = 0
for itemset in kernels:
if self.verbose:
print ".",
sp, pr = self.lookaheads(itemset)
for ns, (pi, ri), t in sp:
inner = kernels[ns].index((pi, ri))
la_table[ns][inner].append(t)
props[state_i] = pr
state_i = state_i + 1
return la_table, props
def LALR1items(self):
la_table, props = self.initLALR1items()
if self.verbose:
print "done init LALR1items"
soi = self.kernelitems
while 1:
added_la = 0
state_i = 0
for state in la_table:
ii = 0
for propterms in state:
if not propterms:
ii = ii + 1
continue
item = soi[state_i][ii]
ii = ii + 1
try:
proplist = props[state_i][item]
except KeyError:
continue
for pstate, pitem in proplist:
inner = soi[pstate].index(pitem)
for pt in propterms:
if pt not in la_table[pstate][inner]:
added_la = 1
la_table[pstate][inner].append(pt)
state_i = state_i + 1
if not added_la:
break
#
# this section just reorganizes the above data
# to the state it's used in later...
#
if self.verbose:
print "done with lalr1items, reorganizing the data"
res = []
state_i = 0
for state in soi:
item_i = 0
inner = []
for item in state:
for term in la_table[state_i][item_i]:
if (item, term) not in inner:
inner.append((item, term))
item_i = item_i + 1
inner.sort()
res.append(inner)
state_i = state_i + 1
self.LALRitems = res
return res
def deriveN(self, nt1, nt2):
"""
assuming nt1 -> nt2 <some string>, what is <some string>? such that
we know it as 1) a set of terminals and 2) whether it contains
Grammar.EPS
"""
pass
def actiontable(self):
items = self.LALRitems
res = []
state_i = 0
terms = self.terminals[:]
terms.append(Grammar.EOF)
errentry = ("", -1)
for state in items:
list = [errentry] * len(terms)
res.append(list)
for (prodind, rhsind), term in state:
if (rhsind ) == len(self.productions[prodind].RHS):
if prodind != 0:
new = ("r", prodind)
old = res[state_i][terms.index(term)]
if old != errentry and old != new:
print "Conflict[%d,%d]:" % (state_i, terms.index(term)), old, "->", new
res[state_i][terms.index(term)] = new
else:
new = ("a", -1)
old = res[state_i][terms.index(term)]
if old != errentry and old != new:
print "Conflict[%d,%d]:" % (state_i, terms.index(term)), old, "->", new
res[state_i][terms.index(term)] = new
#
# calculate reduction by epsilon productions
#
elif self.productions[prodind].RHS[rhsind] in self.nonterminals:
nt = self.productions[prodind].RHS[rhsind]
ntfirst = self.firstmap[nt]
ntfirsts = self.ntfirstmap.get(nt, {})
for k in ntfirsts.keys():
if self.epslhs.get(k, ""):
reduceterms = self.followmap[k]
# print `((prodind, rhsind), term)`, reduceterms
for r in reduceterms:
inner = terms.index(r)
old = res[state_i][inner]
new = ("r", self.epslhs[k])
if old != errentry and old != new:
print "Conflict[%d,%d]:" % (state_i, inner), old, "->", new
res[state_i][inner] = new
#
# calculate the shifts that occur but whose normal items aren't in the kernel
#
tfirsts = self.tfirstmap[nt]
for t in tfirsts:
inner = terms.index(t)
g = self.goto(self.kernelitems[state_i], t)
old = res[state_i][inner]
try:
news = self.kernelitems.index(g)
except ValueError:
continue
new = ("s", news)
if old != errentry and old != new:
print "Conflict[%d,%d]:" % (state_i, inner), old, "->", new
res[state_i][inner] = new
#
# compute the rest of the shifts that occur 'normally' in the kernel
#
else:
t = self.productions[prodind].RHS[rhsind]
inner = self.terminals.index(t)
gt = self.goto(self.kernelitems[state_i], t)
if gt in self.kernelitems:
news = self.kernelitems.index(gt)
old = res[state_i][inner]
new = ("s", news)
if old != errentry and old != new:
print "Conflict[%d,%d]:" % (state_i, inner), old, "->", new
res[state_i][inner] = new
state_i = state_i + 1
return res
def gototable(self):
items = self.kernelitems
res = []
state_i = 0
nonterms = self.nonterminals
err = None
for state in items:
list = [err] * len(nonterms)
res.append(list)
nonterm_i = 0
for nt in nonterms:
goto = self.goto(state, nt)
if goto in items:
res[state_i][nonterm_i] = items.index(goto)
nonterm_i = nonterm_i + 1
state_i = state_i + 1
return res
def mkengine(self, inbufchunksize=None, stackchunksize=None):
"""dynamically will produde a parse engine, just an experiment,
don't try to use it for anything real.
"""
self.augment()
self.LALR1items()
at = self.actiontable()
gt = self.gototable()
self.productions = self.productions[1:] # unaugment
pi = self.prodinfotable()
if not inbufchunksize:
inbufchunksize = 50
if not stackchunksize:
stackchunksize = 100
e = PyLRengine.NewEngine(pi, at, gt, inbufchunksize, stackchunksize)
return e
def writefile(self, filename, parsername="MyParser", lexerinit = "PyLR.Lexer.Lexer()"):
self.augment()
print "About to start LALRitems at %d" % time.time()
self.LALR1items()
print "done building LALRitems at %d" % time.time()
at = self.actiontable()
print "done building actiontable at %d" % time.time()
gt = self.gototable()
print "done building gototable at %d" % time.time()
self.productions = self.productions[1:]
pi = self.prodinfotable()
template = parsertemplate.__doc__
vals = {"parsername": parsername, "lexerinit": lexerinit}
vals["date"] = time.ctime(time.time())
vals["filename"] = filename
if not hasattr(self, "extrasource"):
vals["extrasource"] = ""
else:
vals["extrasource"] = self.extrasource
vals["grammar"] = `self`
actiontable_s = "[\n\t"
for l in at:
actiontable_s = "%s%s,\n\t" % (actiontable_s, `l`)
vals["actiontable"] = actiontable_s[:-3] + "\n]\n\n"
gototable_s = "[\n\t"
for l in gt:
gototable_s = "%s%s,\n\t" % (gototable_s, `l`)
vals["gototable"] = gototable_s[:-3] + "\n]\n\n"
pi_s = "[\n\t"
pii = 0
vals["symbols"] = `self.tokens`
prod2func_s = "Production" + " " * 45 + "Method Name\n"
for l, f, e in pi:
pi_s = "%s(%d, '%s', %d),%s# %s\n\t" % (pi_s,
l,
self.productions[pii].funcname,
e,
" " * (18 - len(self.productions[pii].funcname)),
`self.productions[pii]` )
pii = pii + 1
vals["prodinfo"] = pi_s + "]\n\n"
fp = open(filename, "w")
fp.write(template % vals)
fp.close()
def _makeprod(x):
if len(x)==3: return Production(x[0],x[1],x[2])
if len(x)==2: return Production(x[0],x[1])
raise AttributeError, "Invalid Production initializer"
def _bootstrap():
# dang, how did Scott bootstrap the GrammarParser??
# have to make this by hand
import Lexers
# define the productions
toks = Lexers.GrammarLex().getTokenList()
prods = map(_makeprod,
[("pspec", ["gspec"]),
("pspec", ["pydefs", "gspec"]),
("gspec", [toks.index("GDEL"), "lhsdeflist", toks.index("GDEL")]),
("pydefs", ["pydefs", "pydef"]),
("pydefs", ["pydef"]),
("pydef", [toks.index("LEX")], "lexdef"),
("pydef", [toks.index("CODE")], "addcode"),
("pydef", [toks.index("CLASS")], "classname"),
("lhsdeflist", ["lhsdeflist", "lhsdef"]),
("lhsdeflist", ["lhsdef"]),
("lhsdef", [toks.index("ID"), toks.index("COLON"), "rhslist", toks.index("SCOLON")], "lhsdef"),
("rhslist", ["rhs"], "singletolist"),
("rhslist", ["rhslist", toks.index("OR"), "rhs"], "rhslist_OR_rhs"),
("rhs", ["rhsidlist"], "rhs_idlist"),
("rhs", ["rhsidlist", toks.index("LPAREN"), toks.index("ID"), toks.index("RPAREN")], "rhs_idlist_func"),
("rhsidlist", ["idlist"]),
("rhsidlist", [], "rhseps"),
("idlist", ["idlist", toks.index("ID")], "idl_idlistID"),
("idlist", [toks.index("ID")], "idlistID")])
print string.join(map(lambda x: str(x), prods), "\n")
g = LALRGrammar(prods, toks)
# g.extrasources = "import PyLR.Parsers"
# produce the parser
g.writefile("./Parsers/GrammarParser.py", "GrammarParser", "PyLR.Lexers.GrammarLex()")
if __name__=='__main__':
_bootstrap()

77
PyLR/Lexer.py Normal file
View file

@ -0,0 +1,77 @@
import re, string, StringUtil
__version__ = "$Id$"
class PyLRSyntaxError(SyntaxError):
pass
SKIPTOK = 0x01 # don't consider this a token that is to be considered a part of the grammar, like '\n'
class Lexer:
"""
This is a lexer class for PyLR.
Upon matching text, it must execute a function which will cause it
to return a 2-tuple of type (tok, val) where token is an integer and
val is just any python object that will later be passed as an argument
to the functions that the parser will call when it reduces. For Example
for the grammar
E-> E + T
E -> T
T -> T * F
T -> F
F ->( E )
F -> id
it is likely that the lexer should return the token value of id <tok> and
the integer value of id (string.atoi(id)).
In addition, the lexer must always return (eof, something else) when it's done
scanning to get the parser to continue to be called until parsing is done.
"""
def __init__(self):
self.toklist = [("EOF", None, None, 0)]
self.settext("")
def settext(self, t):
self.text = t
self.rewind()
def getTokenList(self):
"""return list of token names"""
return map(lambda x: x[0], self.toklist)
def rewind(self):
self.textindex = 0
def addpat(self, pat, tokname=None, func=None, flags=0):
"""add search pattern to the lexer"""
self.toklist.append((tokname, re.compile(pat), func, flags))
def __str__(self):
return string.join(map(lambda x: str(x[0])+": "+str(x[1]), self.toklist), "\n")
def scan(self, verbose=0):
if self.textindex >= len(self.text):
if verbose: print "EOF"
return (0, "EOF")
for i in range(1,len(self.toklist)):
tok = self.toklist[i]
mo = tok[1].match(self.text, self.textindex)
if mo is None: # could be the empty string
continue
self.textindex = self.textindex + len(mo.group(0))
if tok[3] & SKIPTOK:
return self.scan(verbose)
else:
if tok[2]:
val = apply(tok[2], (mo,))
else:
val = mo.group(0)
if verbose: print str(i)+", "+str(val)
return (i, val)
raise PyLRSyntaxError, "line "+\
`StringUtil.getLineNumber(self.text, self.textindex)`+\
", near \""+self.text[self.textindex:self.textindex + 10]+"\""

31
PyLR/Lexers/GrammarLex.py Normal file
View file

@ -0,0 +1,31 @@
"""
this file contains the Lexer that is used in parsing Grammar specifications
"""
import re,Lexer
def retlex(mo):
return mo.group("lex")
def retcode(mo):
return mo.group("code")
def retclass(mo):
return mo.group("class")
class GrammarLex(Lexer.Lexer):
def __init__(self):
Lexer.Lexer.__init__(self)
self.addpat(r"_lex\s+(?P<lex>[^\n]*)", "LEX", retlex)
self.addpat(r"_code\s+(?P<code>[^\n]*)", "CODE", retcode)
self.addpat(r"_class\s+(?P<class>[a-zA-Z_][a-zA-Z_0-9]*)", "CLASS", retclass)
self.addpat(r"[a-zA-Z_][a-zA-Z_0-9]*", "ID")
self.addpat(r":", "COLON")
self.addpat(r";", "SCOLON")
self.addpat(r"\|", "OR")
self.addpat(r"\(", "LPAREN")
self.addpat(r"\)", "RPAREN")
self.addpat(r'"""', "GDEL")
self.addpat(r"\s*#[^\n]*", "", None, Lexer.SKIPTOK)
self.addpat(r"\s+", "", None, Lexer.SKIPTOK)

5
PyLR/Lexers/__init__.py Normal file
View file

@ -0,0 +1,5 @@
from GrammarLex import GrammarLex

15
PyLR/Lexers/mathlex.py Normal file
View file

@ -0,0 +1,15 @@
import Lexer, re, string
def idfunc(m):
return int(m.group(0))
class mathlex(Lexer.Lexer):
def __init__(self):
Lexer.Lexer.__init__(self)
self.addpat(r"([1-9]([0-9]+)?)|0", "ID", idfunc)
self.addpat(r"\+", "PLUS")
self.addpat(r"\*","TIMES")
self.addpat(r"\(", "LPAREN")
self.addpat(r"\)", "RPAREN")
self.addpat(r"\s+", "", None, Lexer.SKIPTOK)

319
PyLR/Makefile Normal file
View file

@ -0,0 +1,319 @@
# Generated automatically from Makefile.pre by makesetup.
# Generated automatically from Makefile.pre.in by sedscript.
# Universal Unix Makefile for Python extensions
# =============================================
# Short Instructions
# ------------------
# 1. Build and install Python (1.5 or newer).
# 2. "make -f Makefile.pre.in boot"
# 3. "make"
# You should now have a shared library.
# Long Instructions
# -----------------
# Build *and install* the basic Python 1.5 distribution. See the
# Python README for instructions. (This version of Makefile.pre.in
# only withs with Python 1.5, alpha 3 or newer.)
# Create a file Setup.in for your extension. This file follows the
# format of the Modules/Setup.in file; see the instructions there.
# For a simple module called "spam" on file "spammodule.c", it can
# contain a single line:
# spam spammodule.c
# You can build as many modules as you want in the same directory --
# just have a separate line for each of them in the Setup.in file.
# If you want to build your extension as a shared library, insert a
# line containing just the string
# *shared*
# at the top of your Setup.in file.
# Note that the build process copies Setup.in to Setup, and then works
# with Setup. It doesn't overwrite Setup when Setup.in is changed, so
# while you're in the process of debugging your Setup.in file, you may
# want to edit Setup instead, and copy it back to Setup.in later.
# (All this is done so you can distribute your extension easily and
# someone else can select the modules they actually want to build by
# commenting out lines in the Setup file, without editing the
# original. Editing Setup is also used to specify nonstandard
# locations for include or library files.)
# Copy this file (Misc/Makefile.pre.in) to the directory containing
# your extension.
# Run "make -f Makefile.pre.in boot". This creates Makefile
# (producing Makefile.pre and sedscript as intermediate files) and
# config.c, incorporating the values for sys.prefix, sys.exec_prefix
# and sys.version from the installed Python binary. For this to work,
# the python binary must be on your path. If this fails, try
# make -f Makefile.pre.in Makefile VERSION=1.5 installdir=<prefix>
# where <prefix> is the prefix used to install Python for installdir
# (and possibly similar for exec_installdir=<exec_prefix>).
# Note: "make boot" implies "make clobber" -- it assumes that when you
# bootstrap you may have changed platforms so it removes all previous
# output files.
# If you are building your extension as a shared library (your
# Setup.in file starts with *shared*), run "make" or "make sharedmods"
# to build the shared library files. If you are building a statically
# linked Python binary (the only solution of your platform doesn't
# support shared libraries, and sometimes handy if you want to
# distribute or install the resulting Python binary), run "make
# python".
# Note: Each time you edit Makefile.pre.in or Setup, you must run
# "make Makefile" before running "make".
# Hint: if you want to use VPATH, you can start in an empty
# subdirectory and say (e.g.):
# make -f ../Makefile.pre.in boot srcdir=.. VPATH=..
# === Bootstrap variables (edited through "make boot") ===
# The prefix used by "make inclinstall libainstall" of core python
installdir= /usr
# The exec_prefix used by the same
exec_installdir=/usr
# Source directory and VPATH in case you want to use VPATH.
# (You will have to edit these two lines yourself -- there is no
# automatic support as the Makefile is not generated by
# config.status.)
srcdir= .
VPATH= .
# === Variables that you may want to customize (rarely) ===
# (Static) build target
TARGET= python
# Installed python binary (used only by boot target)
PYTHON= python
# Add more -I and -D options here
CFLAGS= $(OPT) -I$(INCLUDEPY) -I$(EXECINCLUDEPY) $(DEFS)
# These two variables can be set in Setup to merge extensions.
# See example[23].
BASELIB=
BASESETUP=
# === Variables set by makesetup ===
MODOBJS=
MODLIBS= $(LOCALMODLIBS) $(BASEMODLIBS)
# === Definitions added by makesetup ===
LOCALMODLIBS=
BASEMODLIBS=
SHAREDMODS= PyLRenginemodule$(SO)
TKPATH=:lib-tk
GLHACK=-Dclear=__GLclear
PYTHONPATH=$(COREPYTHONPATH)
COREPYTHONPATH=$(DESTPATH)$(SITEPATH)$(MACHDEPPATH)$(STDWINPATH)$(TKPATH)
MACHDEPPATH=:plat-$(MACHDEP)
TESTPATH=
SITEPATH=
DESTPATH=
MACHDESTLIB=$(BINLIBDEST)
DESTLIB=$(LIBDEST)
# === Variables from configure (through sedscript) ===
VERSION= 1.5
CC= gcc
LINKCC= $(CC)
SGI_ABI= @SGI_ABI@
OPT= -g -O2
LDFLAGS=
DEFS= -DHAVE_CONFIG_H
LIBS= -lieee -ldl -lpthread
LIBM= -lm
LIBC=
RANLIB= ranlib
MACHDEP= linux2
SO= .so
LDSHARED= gcc -shared -lc
CCSHARED= -fPIC
LINKFORSHARED= -Xlinker -export-dynamic
CCC=g++
# Install prefix for architecture-independent files
prefix= /usr
# Install prefix for architecture-dependent files
exec_prefix= ${prefix}
# === Fixed definitions ===
# Shell used by make (some versions default to the login shell, which is bad)
SHELL= /bin/sh
# Expanded directories
BINDIR= $(exec_installdir)/bin
LIBDIR= $(exec_prefix)/lib
MANDIR= $(installdir)/man
INCLUDEDIR= $(installdir)/include
SCRIPTDIR= $(prefix)/lib
# Detailed destination directories
BINLIBDEST= $(LIBDIR)/python$(VERSION)
LIBDEST= $(SCRIPTDIR)/python$(VERSION)
INCLUDEPY= $(INCLUDEDIR)/python$(VERSION)
EXECINCLUDEPY= $(exec_installdir)/include/python$(VERSION)
LIBP= $(exec_installdir)/lib/python$(VERSION)
DESTSHARED= $(BINLIBDEST)/site-packages
LIBPL= $(LIBP)/config
PYTHONLIBS= $(LIBPL)/libpython$(VERSION).a
MAKESETUP= $(LIBPL)/makesetup
MAKEFILE= $(LIBPL)/Makefile
CONFIGC= $(LIBPL)/config.c
CONFIGCIN= $(LIBPL)/config.c.in
SETUP= $(LIBPL)/Setup
SYSLIBS= $(LIBM) $(LIBC)
ADDOBJS= $(LIBPL)/python.o config.o
# Portable install script (configure doesn't always guess right)
INSTALL= $(LIBPL)/install-sh -c
# Shared libraries must be installed with executable mode on some systems;
# rather than figuring out exactly which, we always give them executable mode.
# Also, making them read-only seems to be a good idea...
INSTALL_SHARED= ${INSTALL} -m 555
# === Fixed rules ===
# Default target. This builds shared libraries only
default: sharedmods
# Build everything
all: static sharedmods
# Build shared libraries from our extension modules
sharedmods: $(SHAREDMODS)
# Build a static Python binary containing our extension modules
static: $(TARGET)
$(TARGET): $(ADDOBJS) lib.a $(PYTHONLIBS) Makefile $(BASELIB)
$(CC) $(LDFLAGS) $(ADDOBJS) lib.a $(PYTHONLIBS) \
$(LINKPATH) $(BASELIB) $(MODLIBS) $(LIBS) $(SYSLIBS) \
-o $(TARGET)
install: sharedmods
if test ! -d $(DESTSHARED) ; then \
mkdir $(DESTSHARED) ; else true ; fi
-for i in X $(SHAREDMODS); do \
if test $$i != X; \
then $(INSTALL_SHARED) $$i $(DESTSHARED)/$$i; \
fi; \
done
# Build the library containing our extension modules
lib.a: $(MODOBJS)
-rm -f lib.a
ar cr lib.a $(MODOBJS)
-$(RANLIB) lib.a
# This runs makesetup *twice* to use the BASESETUP definition from Setup
config.c Makefile: Makefile.pre Setup $(BASESETUP) $(MAKESETUP)
$(MAKESETUP) \
-m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP)
$(MAKE) -f Makefile do-it-again
# Internal target to run makesetup for the second time
do-it-again:
$(MAKESETUP) \
-m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP)
# Make config.o from the config.c created by makesetup
config.o: config.c
$(CC) $(CFLAGS) -c config.c
# Setup is copied from Setup.in *only* if it doesn't yet exist
Setup:
cp $(srcdir)/Setup.in Setup
# Make the intermediate Makefile.pre from Makefile.pre.in
Makefile.pre: Makefile.pre.in sedscript
sed -f sedscript $(srcdir)/Makefile.pre.in >Makefile.pre
# Shortcuts to make the sed arguments on one line
P=prefix
E=exec_prefix
H=Generated automatically from Makefile.pre.in by sedscript.
L=LINKFORSHARED
# Make the sed script used to create Makefile.pre from Makefile.pre.in
sedscript: $(MAKEFILE)
sed -n \
-e '1s/.*/1i\\/p' \
-e '2s%.*%# $H%p' \
-e '/^VERSION=/s/^VERSION=[ ]*\(.*\)/s%@VERSION[@]%\1%/p' \
-e '/^CC=/s/^CC=[ ]*\(.*\)/s%@CC[@]%\1%/p' \
-e '/^CCC=/s/^CCC=[ ]*\(.*\)/s%#@SET_CCC[@]%CCC=\1%/p' \
-e '/^LINKCC=/s/^LINKCC=[ ]*\(.*\)/s%@LINKCC[@]%\1%/p' \
-e '/^OPT=/s/^OPT=[ ]*\(.*\)/s%@OPT[@]%\1%/p' \
-e '/^LDFLAGS=/s/^LDFLAGS=[ ]*\(.*\)/s%@LDFLAGS[@]%\1%/p' \
-e '/^DEFS=/s/^DEFS=[ ]*\(.*\)/s%@DEFS[@]%\1%/p' \
-e '/^LIBS=/s/^LIBS=[ ]*\(.*\)/s%@LIBS[@]%\1%/p' \
-e '/^LIBM=/s/^LIBM=[ ]*\(.*\)/s%@LIBM[@]%\1%/p' \
-e '/^LIBC=/s/^LIBC=[ ]*\(.*\)/s%@LIBC[@]%\1%/p' \
-e '/^RANLIB=/s/^RANLIB=[ ]*\(.*\)/s%@RANLIB[@]%\1%/p' \
-e '/^MACHDEP=/s/^MACHDEP=[ ]*\(.*\)/s%@MACHDEP[@]%\1%/p' \
-e '/^SO=/s/^SO=[ ]*\(.*\)/s%@SO[@]%\1%/p' \
-e '/^LDSHARED=/s/^LDSHARED=[ ]*\(.*\)/s%@LDSHARED[@]%\1%/p' \
-e '/^CCSHARED=/s/^CCSHARED=[ ]*\(.*\)/s%@CCSHARED[@]%\1%/p' \
-e '/^$L=/s/^$L=[ ]*\(.*\)/s%@$L[@]%\1%/p' \
-e '/^$P=/s/^$P=\(.*\)/s%^$P=.*%$P=\1%/p' \
-e '/^$E=/s/^$E=\(.*\)/s%^$E=.*%$E=\1%/p' \
$(MAKEFILE) >sedscript
echo "/^CCC=g++/d" >>sedscript
echo "/^installdir=/s%=.*%= $(installdir)%" >>sedscript
echo "/^exec_installdir=/s%=.*%=$(exec_installdir)%" >>sedscript
echo "/^srcdir=/s%=.*%= $(srcdir)%" >>sedscript
echo "/^VPATH=/s%=.*%= $(VPATH)%" >>sedscript
echo "/^LINKPATH=/s%=.*%= $(LINKPATH)%" >>sedscript
echo "/^BASELIB=/s%=.*%= $(BASELIB)%" >>sedscript
echo "/^BASESETUP=/s%=.*%= $(BASESETUP)%" >>sedscript
# Bootstrap target
boot: clobber
VERSION=`$(PYTHON) -c "import sys; print sys.version[:3]"`; \
installdir=`$(PYTHON) -c "import sys; print sys.prefix"`; \
exec_installdir=`$(PYTHON) -c "import sys; print sys.exec_prefix"`; \
$(MAKE) -f $(srcdir)/Makefile.pre.in VPATH=$(VPATH) srcdir=$(srcdir) \
VERSION=$$VERSION \
installdir=$$installdir \
exec_installdir=$$exec_installdir \
Makefile
# Handy target to remove intermediate files and backups
clean:
-rm -f *.o *~
# Handy target to remove everything that is easily regenerated
clobber: clean
-rm -f *.a tags TAGS config.c Makefile.pre $(TARGET) sedscript
-rm -f *.so *.sl so_locations
# Handy target to remove everything you don't want to distribute
distclean: clobber
-rm -f Makefile Setup
# Rules appended by makedepend
PyLRenginemodule.o: $(srcdir)/PyLRenginemodule.c; $(CC) $(CCSHARED) $(CFLAGS) -c $(srcdir)/PyLRenginemodule.c
PyLRenginemodule$(SO): PyLRenginemodule.o; $(LDSHARED) PyLRenginemodule.o -o PyLRenginemodule$(SO)

298
PyLR/Makefile.pre Normal file
View file

@ -0,0 +1,298 @@
# Generated automatically from Makefile.pre.in by sedscript.
# Universal Unix Makefile for Python extensions
# =============================================
# Short Instructions
# ------------------
# 1. Build and install Python (1.5 or newer).
# 2. "make -f Makefile.pre.in boot"
# 3. "make"
# You should now have a shared library.
# Long Instructions
# -----------------
# Build *and install* the basic Python 1.5 distribution. See the
# Python README for instructions. (This version of Makefile.pre.in
# only withs with Python 1.5, alpha 3 or newer.)
# Create a file Setup.in for your extension. This file follows the
# format of the Modules/Setup.in file; see the instructions there.
# For a simple module called "spam" on file "spammodule.c", it can
# contain a single line:
# spam spammodule.c
# You can build as many modules as you want in the same directory --
# just have a separate line for each of them in the Setup.in file.
# If you want to build your extension as a shared library, insert a
# line containing just the string
# *shared*
# at the top of your Setup.in file.
# Note that the build process copies Setup.in to Setup, and then works
# with Setup. It doesn't overwrite Setup when Setup.in is changed, so
# while you're in the process of debugging your Setup.in file, you may
# want to edit Setup instead, and copy it back to Setup.in later.
# (All this is done so you can distribute your extension easily and
# someone else can select the modules they actually want to build by
# commenting out lines in the Setup file, without editing the
# original. Editing Setup is also used to specify nonstandard
# locations for include or library files.)
# Copy this file (Misc/Makefile.pre.in) to the directory containing
# your extension.
# Run "make -f Makefile.pre.in boot". This creates Makefile
# (producing Makefile.pre and sedscript as intermediate files) and
# config.c, incorporating the values for sys.prefix, sys.exec_prefix
# and sys.version from the installed Python binary. For this to work,
# the python binary must be on your path. If this fails, try
# make -f Makefile.pre.in Makefile VERSION=1.5 installdir=<prefix>
# where <prefix> is the prefix used to install Python for installdir
# (and possibly similar for exec_installdir=<exec_prefix>).
# Note: "make boot" implies "make clobber" -- it assumes that when you
# bootstrap you may have changed platforms so it removes all previous
# output files.
# If you are building your extension as a shared library (your
# Setup.in file starts with *shared*), run "make" or "make sharedmods"
# to build the shared library files. If you are building a statically
# linked Python binary (the only solution of your platform doesn't
# support shared libraries, and sometimes handy if you want to
# distribute or install the resulting Python binary), run "make
# python".
# Note: Each time you edit Makefile.pre.in or Setup, you must run
# "make Makefile" before running "make".
# Hint: if you want to use VPATH, you can start in an empty
# subdirectory and say (e.g.):
# make -f ../Makefile.pre.in boot srcdir=.. VPATH=..
# === Bootstrap variables (edited through "make boot") ===
# The prefix used by "make inclinstall libainstall" of core python
installdir= /usr
# The exec_prefix used by the same
exec_installdir=/usr
# Source directory and VPATH in case you want to use VPATH.
# (You will have to edit these two lines yourself -- there is no
# automatic support as the Makefile is not generated by
# config.status.)
srcdir= .
VPATH= .
# === Variables that you may want to customize (rarely) ===
# (Static) build target
TARGET= python
# Installed python binary (used only by boot target)
PYTHON= python
# Add more -I and -D options here
CFLAGS= $(OPT) -I$(INCLUDEPY) -I$(EXECINCLUDEPY) $(DEFS)
# These two variables can be set in Setup to merge extensions.
# See example[23].
BASELIB=
BASESETUP=
# === Variables set by makesetup ===
MODOBJS= _MODOBJS_
MODLIBS= _MODLIBS_
# === Definitions added by makesetup ===
# === Variables from configure (through sedscript) ===
VERSION= 1.5
CC= gcc
LINKCC= $(CC)
SGI_ABI= @SGI_ABI@
OPT= -g -O2
LDFLAGS=
DEFS= -DHAVE_CONFIG_H
LIBS= -lieee -ldl -lpthread
LIBM= -lm
LIBC=
RANLIB= ranlib
MACHDEP= linux2
SO= .so
LDSHARED= gcc -shared -lc
CCSHARED= -fPIC
LINKFORSHARED= -Xlinker -export-dynamic
CCC=g++
# Install prefix for architecture-independent files
prefix= /usr
# Install prefix for architecture-dependent files
exec_prefix= ${prefix}
# === Fixed definitions ===
# Shell used by make (some versions default to the login shell, which is bad)
SHELL= /bin/sh
# Expanded directories
BINDIR= $(exec_installdir)/bin
LIBDIR= $(exec_prefix)/lib
MANDIR= $(installdir)/man
INCLUDEDIR= $(installdir)/include
SCRIPTDIR= $(prefix)/lib
# Detailed destination directories
BINLIBDEST= $(LIBDIR)/python$(VERSION)
LIBDEST= $(SCRIPTDIR)/python$(VERSION)
INCLUDEPY= $(INCLUDEDIR)/python$(VERSION)
EXECINCLUDEPY= $(exec_installdir)/include/python$(VERSION)
LIBP= $(exec_installdir)/lib/python$(VERSION)
DESTSHARED= $(BINLIBDEST)/site-packages
LIBPL= $(LIBP)/config
PYTHONLIBS= $(LIBPL)/libpython$(VERSION).a
MAKESETUP= $(LIBPL)/makesetup
MAKEFILE= $(LIBPL)/Makefile
CONFIGC= $(LIBPL)/config.c
CONFIGCIN= $(LIBPL)/config.c.in
SETUP= $(LIBPL)/Setup
SYSLIBS= $(LIBM) $(LIBC)
ADDOBJS= $(LIBPL)/python.o config.o
# Portable install script (configure doesn't always guess right)
INSTALL= $(LIBPL)/install-sh -c
# Shared libraries must be installed with executable mode on some systems;
# rather than figuring out exactly which, we always give them executable mode.
# Also, making them read-only seems to be a good idea...
INSTALL_SHARED= ${INSTALL} -m 555
# === Fixed rules ===
# Default target. This builds shared libraries only
default: sharedmods
# Build everything
all: static sharedmods
# Build shared libraries from our extension modules
sharedmods: $(SHAREDMODS)
# Build a static Python binary containing our extension modules
static: $(TARGET)
$(TARGET): $(ADDOBJS) lib.a $(PYTHONLIBS) Makefile $(BASELIB)
$(CC) $(LDFLAGS) $(ADDOBJS) lib.a $(PYTHONLIBS) \
$(LINKPATH) $(BASELIB) $(MODLIBS) $(LIBS) $(SYSLIBS) \
-o $(TARGET)
install: sharedmods
if test ! -d $(DESTSHARED) ; then \
mkdir $(DESTSHARED) ; else true ; fi
-for i in X $(SHAREDMODS); do \
if test $$i != X; \
then $(INSTALL_SHARED) $$i $(DESTSHARED)/$$i; \
fi; \
done
# Build the library containing our extension modules
lib.a: $(MODOBJS)
-rm -f lib.a
ar cr lib.a $(MODOBJS)
-$(RANLIB) lib.a
# This runs makesetup *twice* to use the BASESETUP definition from Setup
config.c Makefile: Makefile.pre Setup $(BASESETUP) $(MAKESETUP)
$(MAKESETUP) \
-m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP)
$(MAKE) -f Makefile do-it-again
# Internal target to run makesetup for the second time
do-it-again:
$(MAKESETUP) \
-m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP)
# Make config.o from the config.c created by makesetup
config.o: config.c
$(CC) $(CFLAGS) -c config.c
# Setup is copied from Setup.in *only* if it doesn't yet exist
Setup:
cp $(srcdir)/Setup.in Setup
# Make the intermediate Makefile.pre from Makefile.pre.in
Makefile.pre: Makefile.pre.in sedscript
sed -f sedscript $(srcdir)/Makefile.pre.in >Makefile.pre
# Shortcuts to make the sed arguments on one line
P=prefix
E=exec_prefix
H=Generated automatically from Makefile.pre.in by sedscript.
L=LINKFORSHARED
# Make the sed script used to create Makefile.pre from Makefile.pre.in
sedscript: $(MAKEFILE)
sed -n \
-e '1s/.*/1i\\/p' \
-e '2s%.*%# $H%p' \
-e '/^VERSION=/s/^VERSION=[ ]*\(.*\)/s%@VERSION[@]%\1%/p' \
-e '/^CC=/s/^CC=[ ]*\(.*\)/s%@CC[@]%\1%/p' \
-e '/^CCC=/s/^CCC=[ ]*\(.*\)/s%#@SET_CCC[@]%CCC=\1%/p' \
-e '/^LINKCC=/s/^LINKCC=[ ]*\(.*\)/s%@LINKCC[@]%\1%/p' \
-e '/^OPT=/s/^OPT=[ ]*\(.*\)/s%@OPT[@]%\1%/p' \
-e '/^LDFLAGS=/s/^LDFLAGS=[ ]*\(.*\)/s%@LDFLAGS[@]%\1%/p' \
-e '/^DEFS=/s/^DEFS=[ ]*\(.*\)/s%@DEFS[@]%\1%/p' \
-e '/^LIBS=/s/^LIBS=[ ]*\(.*\)/s%@LIBS[@]%\1%/p' \
-e '/^LIBM=/s/^LIBM=[ ]*\(.*\)/s%@LIBM[@]%\1%/p' \
-e '/^LIBC=/s/^LIBC=[ ]*\(.*\)/s%@LIBC[@]%\1%/p' \
-e '/^RANLIB=/s/^RANLIB=[ ]*\(.*\)/s%@RANLIB[@]%\1%/p' \
-e '/^MACHDEP=/s/^MACHDEP=[ ]*\(.*\)/s%@MACHDEP[@]%\1%/p' \
-e '/^SO=/s/^SO=[ ]*\(.*\)/s%@SO[@]%\1%/p' \
-e '/^LDSHARED=/s/^LDSHARED=[ ]*\(.*\)/s%@LDSHARED[@]%\1%/p' \
-e '/^CCSHARED=/s/^CCSHARED=[ ]*\(.*\)/s%@CCSHARED[@]%\1%/p' \
-e '/^$L=/s/^$L=[ ]*\(.*\)/s%@$L[@]%\1%/p' \
-e '/^$P=/s/^$P=\(.*\)/s%^$P=.*%$P=\1%/p' \
-e '/^$E=/s/^$E=\(.*\)/s%^$E=.*%$E=\1%/p' \
$(MAKEFILE) >sedscript
echo "/^CCC=g++/d" >>sedscript
echo "/^installdir=/s%=.*%= $(installdir)%" >>sedscript
echo "/^exec_installdir=/s%=.*%=$(exec_installdir)%" >>sedscript
echo "/^srcdir=/s%=.*%= $(srcdir)%" >>sedscript
echo "/^VPATH=/s%=.*%= $(VPATH)%" >>sedscript
echo "/^LINKPATH=/s%=.*%= $(LINKPATH)%" >>sedscript
echo "/^BASELIB=/s%=.*%= $(BASELIB)%" >>sedscript
echo "/^BASESETUP=/s%=.*%= $(BASESETUP)%" >>sedscript
# Bootstrap target
boot: clobber
VERSION=`$(PYTHON) -c "import sys; print sys.version[:3]"`; \
installdir=`$(PYTHON) -c "import sys; print sys.prefix"`; \
exec_installdir=`$(PYTHON) -c "import sys; print sys.exec_prefix"`; \
$(MAKE) -f $(srcdir)/Makefile.pre.in VPATH=$(VPATH) srcdir=$(srcdir) \
VERSION=$$VERSION \
installdir=$$installdir \
exec_installdir=$$exec_installdir \
Makefile
# Handy target to remove intermediate files and backups
clean:
-rm -f *.o *~
# Handy target to remove everything that is easily regenerated
clobber: clean
-rm -f *.a tags TAGS config.c Makefile.pre $(TARGET) sedscript
-rm -f *.so *.sl so_locations
# Handy target to remove everything you don't want to distribute
distclean: clobber
-rm -f Makefile Setup

297
PyLR/Makefile.pre.in Normal file
View file

@ -0,0 +1,297 @@
# Universal Unix Makefile for Python extensions
# =============================================
# Short Instructions
# ------------------
# 1. Build and install Python (1.5 or newer).
# 2. "make -f Makefile.pre.in boot"
# 3. "make"
# You should now have a shared library.
# Long Instructions
# -----------------
# Build *and install* the basic Python 1.5 distribution. See the
# Python README for instructions. (This version of Makefile.pre.in
# only withs with Python 1.5, alpha 3 or newer.)
# Create a file Setup.in for your extension. This file follows the
# format of the Modules/Setup.in file; see the instructions there.
# For a simple module called "spam" on file "spammodule.c", it can
# contain a single line:
# spam spammodule.c
# You can build as many modules as you want in the same directory --
# just have a separate line for each of them in the Setup.in file.
# If you want to build your extension as a shared library, insert a
# line containing just the string
# *shared*
# at the top of your Setup.in file.
# Note that the build process copies Setup.in to Setup, and then works
# with Setup. It doesn't overwrite Setup when Setup.in is changed, so
# while you're in the process of debugging your Setup.in file, you may
# want to edit Setup instead, and copy it back to Setup.in later.
# (All this is done so you can distribute your extension easily and
# someone else can select the modules they actually want to build by
# commenting out lines in the Setup file, without editing the
# original. Editing Setup is also used to specify nonstandard
# locations for include or library files.)
# Copy this file (Misc/Makefile.pre.in) to the directory containing
# your extension.
# Run "make -f Makefile.pre.in boot". This creates Makefile
# (producing Makefile.pre and sedscript as intermediate files) and
# config.c, incorporating the values for sys.prefix, sys.exec_prefix
# and sys.version from the installed Python binary. For this to work,
# the python binary must be on your path. If this fails, try
# make -f Makefile.pre.in Makefile VERSION=1.5 installdir=<prefix>
# where <prefix> is the prefix used to install Python for installdir
# (and possibly similar for exec_installdir=<exec_prefix>).
# Note: "make boot" implies "make clobber" -- it assumes that when you
# bootstrap you may have changed platforms so it removes all previous
# output files.
# If you are building your extension as a shared library (your
# Setup.in file starts with *shared*), run "make" or "make sharedmods"
# to build the shared library files. If you are building a statically
# linked Python binary (the only solution of your platform doesn't
# support shared libraries, and sometimes handy if you want to
# distribute or install the resulting Python binary), run "make
# python".
# Note: Each time you edit Makefile.pre.in or Setup, you must run
# "make Makefile" before running "make".
# Hint: if you want to use VPATH, you can start in an empty
# subdirectory and say (e.g.):
# make -f ../Makefile.pre.in boot srcdir=.. VPATH=..
# === Bootstrap variables (edited through "make boot") ===
# The prefix used by "make inclinstall libainstall" of core python
installdir= /usr/local
# The exec_prefix used by the same
exec_installdir=$(installdir)
# Source directory and VPATH in case you want to use VPATH.
# (You will have to edit these two lines yourself -- there is no
# automatic support as the Makefile is not generated by
# config.status.)
srcdir= .
VPATH= .
# === Variables that you may want to customize (rarely) ===
# (Static) build target
TARGET= python
# Installed python binary (used only by boot target)
PYTHON= python
# Add more -I and -D options here
CFLAGS= $(OPT) -I$(INCLUDEPY) -I$(EXECINCLUDEPY) $(DEFS)
# These two variables can be set in Setup to merge extensions.
# See example[23].
BASELIB=
BASESETUP=
# === Variables set by makesetup ===
MODOBJS= _MODOBJS_
MODLIBS= _MODLIBS_
# === Definitions added by makesetup ===
# === Variables from configure (through sedscript) ===
VERSION= @VERSION@
CC= @CC@
LINKCC= @LINKCC@
SGI_ABI= @SGI_ABI@
OPT= @OPT@
LDFLAGS= @LDFLAGS@
DEFS= @DEFS@
LIBS= @LIBS@
LIBM= @LIBM@
LIBC= @LIBC@
RANLIB= @RANLIB@
MACHDEP= @MACHDEP@
SO= @SO@
LDSHARED= @LDSHARED@
CCSHARED= @CCSHARED@
LINKFORSHARED= @LINKFORSHARED@
#@SET_CCC@
# Install prefix for architecture-independent files
prefix= /usr/local
# Install prefix for architecture-dependent files
exec_prefix= $(prefix)
# === Fixed definitions ===
# Shell used by make (some versions default to the login shell, which is bad)
SHELL= /bin/sh
# Expanded directories
BINDIR= $(exec_installdir)/bin
LIBDIR= $(exec_prefix)/lib
MANDIR= $(installdir)/man
INCLUDEDIR= $(installdir)/include
SCRIPTDIR= $(prefix)/lib
# Detailed destination directories
BINLIBDEST= $(LIBDIR)/python$(VERSION)
LIBDEST= $(SCRIPTDIR)/python$(VERSION)
INCLUDEPY= $(INCLUDEDIR)/python$(VERSION)
EXECINCLUDEPY= $(exec_installdir)/include/python$(VERSION)
LIBP= $(exec_installdir)/lib/python$(VERSION)
DESTSHARED= $(BINLIBDEST)/site-packages
LIBPL= $(LIBP)/config
PYTHONLIBS= $(LIBPL)/libpython$(VERSION).a
MAKESETUP= $(LIBPL)/makesetup
MAKEFILE= $(LIBPL)/Makefile
CONFIGC= $(LIBPL)/config.c
CONFIGCIN= $(LIBPL)/config.c.in
SETUP= $(LIBPL)/Setup
SYSLIBS= $(LIBM) $(LIBC)
ADDOBJS= $(LIBPL)/python.o config.o
# Portable install script (configure doesn't always guess right)
INSTALL= $(LIBPL)/install-sh -c
# Shared libraries must be installed with executable mode on some systems;
# rather than figuring out exactly which, we always give them executable mode.
# Also, making them read-only seems to be a good idea...
INSTALL_SHARED= ${INSTALL} -m 555
# === Fixed rules ===
# Default target. This builds shared libraries only
default: sharedmods
# Build everything
all: static sharedmods
# Build shared libraries from our extension modules
sharedmods: $(SHAREDMODS)
# Build a static Python binary containing our extension modules
static: $(TARGET)
$(TARGET): $(ADDOBJS) lib.a $(PYTHONLIBS) Makefile $(BASELIB)
$(CC) $(LDFLAGS) $(ADDOBJS) lib.a $(PYTHONLIBS) \
$(LINKPATH) $(BASELIB) $(MODLIBS) $(LIBS) $(SYSLIBS) \
-o $(TARGET)
install: sharedmods
if test ! -d $(DESTSHARED) ; then \
mkdir $(DESTSHARED) ; else true ; fi
-for i in X $(SHAREDMODS); do \
if test $$i != X; \
then $(INSTALL_SHARED) $$i $(DESTSHARED)/$$i; \
fi; \
done
# Build the library containing our extension modules
lib.a: $(MODOBJS)
-rm -f lib.a
ar cr lib.a $(MODOBJS)
-$(RANLIB) lib.a
# This runs makesetup *twice* to use the BASESETUP definition from Setup
config.c Makefile: Makefile.pre Setup $(BASESETUP) $(MAKESETUP)
$(MAKESETUP) \
-m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP)
$(MAKE) -f Makefile do-it-again
# Internal target to run makesetup for the second time
do-it-again:
$(MAKESETUP) \
-m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP)
# Make config.o from the config.c created by makesetup
config.o: config.c
$(CC) $(CFLAGS) -c config.c
# Setup is copied from Setup.in *only* if it doesn't yet exist
Setup:
cp $(srcdir)/Setup.in Setup
# Make the intermediate Makefile.pre from Makefile.pre.in
Makefile.pre: Makefile.pre.in sedscript
sed -f sedscript $(srcdir)/Makefile.pre.in >Makefile.pre
# Shortcuts to make the sed arguments on one line
P=prefix
E=exec_prefix
H=Generated automatically from Makefile.pre.in by sedscript.
L=LINKFORSHARED
# Make the sed script used to create Makefile.pre from Makefile.pre.in
sedscript: $(MAKEFILE)
sed -n \
-e '1s/.*/1i\\/p' \
-e '2s%.*%# $H%p' \
-e '/^VERSION=/s/^VERSION=[ ]*\(.*\)/s%@VERSION[@]%\1%/p' \
-e '/^CC=/s/^CC=[ ]*\(.*\)/s%@CC[@]%\1%/p' \
-e '/^CCC=/s/^CCC=[ ]*\(.*\)/s%#@SET_CCC[@]%CCC=\1%/p' \
-e '/^LINKCC=/s/^LINKCC=[ ]*\(.*\)/s%@LINKCC[@]%\1%/p' \
-e '/^OPT=/s/^OPT=[ ]*\(.*\)/s%@OPT[@]%\1%/p' \
-e '/^LDFLAGS=/s/^LDFLAGS=[ ]*\(.*\)/s%@LDFLAGS[@]%\1%/p' \
-e '/^DEFS=/s/^DEFS=[ ]*\(.*\)/s%@DEFS[@]%\1%/p' \
-e '/^LIBS=/s/^LIBS=[ ]*\(.*\)/s%@LIBS[@]%\1%/p' \
-e '/^LIBM=/s/^LIBM=[ ]*\(.*\)/s%@LIBM[@]%\1%/p' \
-e '/^LIBC=/s/^LIBC=[ ]*\(.*\)/s%@LIBC[@]%\1%/p' \
-e '/^RANLIB=/s/^RANLIB=[ ]*\(.*\)/s%@RANLIB[@]%\1%/p' \
-e '/^MACHDEP=/s/^MACHDEP=[ ]*\(.*\)/s%@MACHDEP[@]%\1%/p' \
-e '/^SO=/s/^SO=[ ]*\(.*\)/s%@SO[@]%\1%/p' \
-e '/^LDSHARED=/s/^LDSHARED=[ ]*\(.*\)/s%@LDSHARED[@]%\1%/p' \
-e '/^CCSHARED=/s/^CCSHARED=[ ]*\(.*\)/s%@CCSHARED[@]%\1%/p' \
-e '/^$L=/s/^$L=[ ]*\(.*\)/s%@$L[@]%\1%/p' \
-e '/^$P=/s/^$P=\(.*\)/s%^$P=.*%$P=\1%/p' \
-e '/^$E=/s/^$E=\(.*\)/s%^$E=.*%$E=\1%/p' \
$(MAKEFILE) >sedscript
echo "/^#@SET_CCC@/d" >>sedscript
echo "/^installdir=/s%=.*%= $(installdir)%" >>sedscript
echo "/^exec_installdir=/s%=.*%=$(exec_installdir)%" >>sedscript
echo "/^srcdir=/s%=.*%= $(srcdir)%" >>sedscript
echo "/^VPATH=/s%=.*%= $(VPATH)%" >>sedscript
echo "/^LINKPATH=/s%=.*%= $(LINKPATH)%" >>sedscript
echo "/^BASELIB=/s%=.*%= $(BASELIB)%" >>sedscript
echo "/^BASESETUP=/s%=.*%= $(BASESETUP)%" >>sedscript
# Bootstrap target
boot: clobber
VERSION=`$(PYTHON) -c "import sys; print sys.version[:3]"`; \
installdir=`$(PYTHON) -c "import sys; print sys.prefix"`; \
exec_installdir=`$(PYTHON) -c "import sys; print sys.exec_prefix"`; \
$(MAKE) -f $(srcdir)/Makefile.pre.in VPATH=$(VPATH) srcdir=$(srcdir) \
VERSION=$$VERSION \
installdir=$$installdir \
exec_installdir=$$exec_installdir \
Makefile
# Handy target to remove intermediate files and backups
clean:
-rm -f *.o *~
# Handy target to remove everything that is easily regenerated
clobber: clean
-rm -f *.a tags TAGS config.c Makefile.pre $(TARGET) sedscript
-rm -f *.so *.sl so_locations
# Handy target to remove everything you don't want to distribute
distclean: clobber
-rm -f Makefile Setup

45
PyLR/Parser.py Normal file
View file

@ -0,0 +1,45 @@
__version__ = "$Id$"
import PyLRengine
class Parser:
def __init__(self, lexer, actiontable, gototable, prodinfo):
self.lexer = lexer
self.actions = actiontable
self.gotos = gototable
# get the function from the function name
# if we forgot to supply a function we get an AttributeError here
try: self.prodinfo = map(lambda x,s=self: (x[0], getattr(s, x[1]), x[2]),
prodinfo)
except AttributeError:
sys.stderr.write("Parser: error: forgot to supply a parser function\n")
raise
self.engine = None
# the unspecified function (the default for all productions)
def unspecified(*args):
return args[1]
def initengine(self, dodel=0):
self.engine = PyLRengine.NewEngine(self.prodinfo, self.actions, self.gotos)
if dodel:
self.actions = []
self.gotos = []
self.prodinfo = []
def parse(self, text, verbose=0):
self.initengine()
self.lexer.settext(text)
while 1:
tok, val = self.lexer.scan(verbose)
if not self.engine.parse(tok, val, verbose):
break
# need to add a method to the engine to
# return the final value
# and return that here
return None

View file

@ -0,0 +1,169 @@
"""
./Parsers/GrammarParser.py -- created Wed Feb 23 15:23:44 2000
This file was automatically generated by the PyLR parser generator.
It defines the tables 'actiontable', 'gototable', and 'prodinfo'. These
tables are used to give functionality to a parsing engine. It also defines
A Parser class called GrammarParser which will use this engine. It's usage
is indicated in GrammarParser's doc-string.
"""
#
# this section contains source code added by the user
# plus 'import PyLR'
#
import PyLR
#
# the action table ('s', 4) means shift to state 4,
# ('r', 4) means reduce by production number 4
# other entries are errors. each row represents a state
# and each column a terminal lookahead symbol (excluding symbols with
# Lexer.SKIPTOK).
# Lexer symbols are:
# ['EOF', 'LEX', 'CODE', 'CLASS', 'ID', 'COLON', 'SCOLON', 'OR', 'LPAREN', 'RPAREN', 'GDEL', '', '']
#
_actiontable = [
[('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('a', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 1)],
[('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 2)],
[('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 7), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 3)],
[('r', 4), ('r', 4), ('r', 4), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 4), ('', -1)],
[('r', 5), ('r', 5), ('r', 5), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 5), ('', -1)],
[('r', 6), ('r', 6), ('r', 6), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 6), ('', -1)],
[('r', 7), ('r', 7), ('r', 7), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 7), ('', -1)],
[('r', 8), ('r', 8), ('r', 8), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 8), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 9), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 9), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 10), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 10), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('s', 16), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 18), ('s', 20), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 11), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 11), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 12), ('r', 12), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 13), ('r', 13), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 14), ('r', 14), ('s', 23), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 24), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 25), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 15), ('r', 15), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 27), ('', -1), ('r', 16), ('r', 16), ('r', 16), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 18), ('', -1), ('r', 18), ('r', 18), ('r', 18), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 19), ('', -1), ('r', 19), ('r', 19), ('r', 19), ('', -1), ('', -1), ('', -1)]
]
#
# the goto table, each row represents a state
# and each column, the nonterminal that was on the lhs of the
# reduction
#
_gototable = [
[1, 2, 3, 9, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, 4, None, 8, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, 6, 14, None, None, None, None],
[None, None, None, None, None, 13, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, 17, 19, 22, 26],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, 21, 22, 26],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None]
]
#
# This is the prodinfo table. each row represents a production
# the entries are the length of the production, the name of a method
# in an instance of the GrammarParser class below that gets called
# when that production occurs, and the index of the lhs in the
# nonterminals (as in # the gototable)
#
_prodinfo = [
(1, 'unspecified', 0), # pspec: gspec (unspecified)
(2, 'unspecified', 0), # pspec: pydefs gspec (unspecified)
(3, 'unspecified', 1), # gspec: 10 lhsdeflist 10 (unspecified)
(2, 'unspecified', 2), # pydefs: pydefs pydef (unspecified)
(1, 'unspecified', 2), # pydefs: pydef (unspecified)
(1, 'lexdef', 3), # pydef: 1 (lexdef)
(1, 'addcode', 3), # pydef: 2 (addcode)
(1, 'classname', 3), # pydef: 3 (classname)
(2, 'unspecified', 4), # lhsdeflist: lhsdeflist lhsdef (unspecified)
(1, 'unspecified', 4), # lhsdeflist: lhsdef (unspecified)
(4, 'lhsdef', 5), # lhsdef: 4 5 rhslist 6 (lhsdef)
(1, 'singletolist', 6), # rhslist: rhs (singletolist)
(3, 'rhslist_OR_rhs', 6), # rhslist: rhslist 7 rhs (rhslist_OR_rhs)
(1, 'rhs_idlist', 7), # rhs: rhsidlist (rhs_idlist)
(4, 'rhs_idlist_func', 7), # rhs: rhsidlist 8 4 9 (rhs_idlist_func)
(1, 'unspecified', 8), # rhsidlist: idlist (unspecified)
(0, 'rhseps', 8), # rhsidlist: (rhseps)
(2, 'idl_idlistID', 9), # idlist: idlist 4 (idl_idlistID)
(1, 'idlistID', 9), # idlist: 4 (idlistID)
]
class GrammarParser(PyLR.Parser.Parser):
"""
this class was produced automatically by the PyLR parser generator.
It is meant to be subclassed to produce a parser for the grammar
pspec: gspec (unspecified);
pspec: pydefs gspec (unspecified);
gspec: GDEL lhsdeflist GDEL (unspecified);
pydefs: pydefs pydef (unspecified);
pydefs: pydef (unspecified);
pydef: LEX (lexdef);
pydef: CODE (addcode);
pydef: CLASS (classname);
lhsdeflist: lhsdeflist lhsdef (unspecified);
lhsdeflist: lhsdef (unspecified);
lhsdef: ID COLON rhslist SCOLON (lhsdef);
rhslist: rhs (singletolist);
rhslist: rhslist OR rhs (rhslist_OR_rhs);
rhs: rhsidlist (rhs_idlist);
rhs: rhsidlist LPAREN ID RPAREN (rhs_idlist_func);
rhsidlist: idlist (unspecified);
rhsidlist: (rhseps);
idlist: idlist ID (idl_idlistID);
idlist: ID (idlistID);
While parsing input, if one of the above productions is recognized,
a method of your sub-class (whose name is indicated in parens to the
right) will be invoked. Names marked 'unspecified' should be ignored.
usage:
class MyGrammarParser(GrammarParser):
# ...define the methods for the productions...
p = MyGrammarParser(); p.parse(text)
"""
def __init__(self):
lexer = PyLR.Lexers.GrammarLex()
PyLR.Parser.Parser.__init__(self, lexer, _actiontable, _gototable, _prodinfo)

7
PyLR/Parsers/__init__.py Normal file
View file

@ -0,0 +1,7 @@
"""if you want to make parsers available from this package directly,
that is, if you want 'from PyLR.Parsers import RandomParser' to
work, import the name here
"""
from GrammarParser import GrammarParser

170
PyLR/Parsers/gram.py Normal file
View file

@ -0,0 +1,170 @@
"""
out -- created Tue Dec 16 00:30:36 1997
This file was automatically generated by the PyLR parser generator.
It defines the tables 'actiontable', 'gototable', and 'prodinfo'. These
tables are used to give functionality to a parsing engine. It also defines
A Parser class called GrammarParser which will use this engine. It's Usage is
indicated in GrammarParser's doc-string.
"""
#
# this section contains source code added by the user
# plus 'import PyLR'
#
import PyLR.Lexers
import PyLR.Parser
import PyLR
#
# the action table ('s', 4) means shift to state 4,
# ('r', 4) means reduce by production number 4
# other entries are errors. each row represents a state
# and each column a terminal lookahead symbol (plus EOF)
# these symbols are ['LEX', 'CODE', 'CLASS', 'ID', 'COLON', 'SCOLON', 'OR', 'LPAREN', 'RPAREN', 'GDEL', 'EOF']
#
_actiontable = [
[('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('a', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 1)],
[('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 2)],
[('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 7), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 3)],
[('r', 4), ('r', 4), ('r', 4), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 4), ('', -1)],
[('r', 5), ('r', 5), ('r', 5), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 5), ('', -1)],
[('r', 6), ('r', 6), ('r', 6), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 6), ('', -1)],
[('r', 7), ('r', 7), ('r', 7), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 7), ('', -1)],
[('r', 8), ('r', 8), ('r', 8), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 8), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 9), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 9), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 10), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 10), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('s', 16), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 18), ('s', 20), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 11), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 11), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 12), ('r', 12), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 13), ('r', 13), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 14), ('r', 14), ('s', 23), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 24), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 25), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 15), ('r', 15), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 27), ('', -1), ('r', 16), ('r', 16), ('r', 16), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 18), ('', -1), ('r', 18), ('r', 18), ('r', 18), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 19), ('', -1), ('r', 19), ('r', 19), ('r', 19), ('', -1), ('', -1), ('', -1)]
]
#
# the goto table, each row represents a state
# and each column, the nonterminal that was on the lhs of the
# reduction
#
_gototable = [
[1, 2, 3, 9, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, 4, None, 8, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, 6, 14, None, None, None, None],
[None, None, None, None, None, 13, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, 17, 19, 22, 26],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, 21, 22, 26],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None]
]
#
# This is the prodinfo table. each row represents a production
# the entries are the length of the production, the name of a method
# in an instance of the GrammarParser class below that gets called
# when that production occurs, and the index of the lhs in the
# nonterminals (as in # the gototable)
#
_prodinfo = [
(1, 'unspecified', 0), # pspec -> ['gspec']
(2, 'unspecified', 0), # pspec -> ['pydefs', 'gspec']
(3, 'unspecified', 1), # gspec -> ['GDEL', 'lhsdeflist', 'GDEL']
(2, 'unspecified', 2), # pydefs -> ['pydefs', 'pydef']
(1, 'unspecified', 2), # pydefs -> ['pydef']
(1, 'lexdef', 3), # pydef -> ['LEX']
(1, 'addcode', 3), # pydef -> ['CODE']
(1, 'classname', 3), # pydef -> ['CLASS']
(2, 'unspecified', 4), # lhsdeflist -> ['lhsdeflist', 'lhsdef']
(1, 'unspecified', 4), # lhsdeflist -> ['lhsdef']
(4, 'lhsdef', 5), # lhsdef -> ['ID', 'COLON', 'rhslist', 'SCOLON']
(1, 'singletolist', 6), # rhslist -> ['rhs']
(3, 'rhslist_OR_rhs', 6), # rhslist -> ['rhslist', 'OR', 'rhs']
(1, 'rhs_idlist', 7), # rhs -> ['rhsidlist']
(4, 'rhs_idlist_func', 7), # rhs -> ['rhsidlist', 'LPAREN', 'ID', 'RPAREN']
(1, 'unspecified', 8), # rhsidlist -> ['idlist']
(0, 'rhseps', 8), # rhsidlist -> []
(2, 'idl_idlistID', 9), # idlist -> ['idlist', 'ID']
(1, 'idlistID', 9), # idlist -> ['ID']
]
class GrammarParser (PyLR.Parser.Parser):
"""
this class was produced automatically by the PyLR parser generator.
It is meant to be subclassed to produce a parser for the grammar
pspec -> gspec (unspecified)
| pydefs gspec; (unspecified)
gspec -> GDEL lhsdeflist GDEL; (unspecified)
pydefs -> pydefs pydef (unspecified)
| pydef; (unspecified)
pydef -> LEX (lexdef)
| CODE (addcode)
| CLASS; (classname)
lhsdeflist -> lhsdeflist lhsdef (unspecified)
| lhsdef; (unspecified)
lhsdef -> ID COLON rhslist SCOLON; (lhsdef)
rhslist -> rhs (singletolist)
| rhslist OR rhs; (rhslist_OR_rhs)
rhs -> rhsidlist (rhs_idlist)
| rhsidlist LPAREN ID RPAREN; (rhs_idlist_func)
rhsidlist -> idlist (unspecified)
| ; (rhseps)
idlist -> idlist ID (idl_idlistID)
| ID; (idlistID)
While parsing input, if one of the above productions is recognized,
a method of your sub-class (whose name is indicated in parens to the
right) will be invoked. Names marked 'unspecified' should be ignored.
usage:
class MyGrammarParser(GrammarParser):
# ...define the methods for the productions...
p = MyGrammarParser(); p.parse(text)
"""
def __init__(self):
lexer = PyLR.Lexers.GrammarLex()
PyLR.Parser.Parser.__init__(self, lexer, _actiontable, _gototable, _prodinfo)

81
PyLR/PyLRengine.h Normal file
View file

@ -0,0 +1,81 @@
#ifndef Py_PYLRENGINE_H
#define Py_PYLRENGINE_H
#ifdef __cplusplus
extern "C" {
#endif
#define EOBUF -1
struct inbufdatum {
PyObject* pylrval;
int tok;
};
struct inbufdata {
struct inbufdatum** chunk;
struct inbufdata* next;
};
typedef struct inbuf_struct {
struct inbufdata* data;
int bi;
int nextinput;
int chunksize;
} inbuftype;
struct stackdatum {
int state;
int tok;
PyObject* pylrval;
};
struct stackdata {
struct stackdatum** bucket;
struct stackdata* next;
};
typedef struct stack_struct {
struct stackdata* data;
int si;
int chunksize;
} stacktype;
typedef struct prodinfo_struct {
int len;
PyObject* func;
int lhsi;
} prodinfo_type;
typedef struct actionstruct{
int arg;
short act;
} actiontype;
/***********************************************************************
* the possible values of the action table
***********************************************************************/
#define SHIFT 's'
#define REDUCE 'r'
#define ACCEPT 'a'
typedef struct {
PyObject_HEAD
inbuftype* inbuf;
stacktype* stack;
prodinfo_type** prodinfo;
int prodinfosize;
int** gototable;
int goto_x;
int goto_y;
actiontype*** actiontable;
int act_x;
int act_y;
int toksadded;
} parserobject;
#ifdef __cplusplus
}
#endif
#endif /* !Py_PYLRENGINE_H */

717
PyLR/PyLRenginemodule.c Normal file
View file

@ -0,0 +1,717 @@
/***********************************************************************
* This file defines an ParseEngine (LR), It references a Parsing table
* that is defined in python.
*
* This defines a new type object in Python, called a Parser. It has
* 3 methods, .parse(int: token, char *: text),
* of them). .setaction(production), and .getaction(production).
*
* $Id$
*
***********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "Python.h"
#include "PyLRengine.h"
/***********************************************************************
* PyLRengine Error things
***********************************************************************/
static PyObject* PyLRParseError;
#define CHECK_MALLOC(obj) \
if (!(obj = (PyObject *) malloc (sizeof(PyObject)))) { \
PyErr_SetString(PyExc_MemoryError, "no more memory"); \
return NULL; \
}
#define onError(message) \
{ PyErr_SetString(PyExc_ParseError, message); return NULL; }
/***********************************************************************
* The engines input buffer. has a chunksize controllable from within
* python. functions are initinbufdata, init_inbuf, incbi, add2buf,
* gettoken, petpylrval, dumpinbuf
***********************************************************************/
static struct inbufdata * init_inbufdata(chunksize)
int chunksize;
{
struct inbufdata * ibd;
int i;
if ((ibd = (struct inbufdata *) malloc(sizeof(struct inbufdata))) == NULL) {
printf("No more Memory!\n");
exit(1);
}
if ((ibd->chunk = (struct inbufdatum **) malloc(sizeof(struct inbufdatum *) * chunksize)) == NULL) {
printf("No more Memory!\n");
exit(1);
}
for (i=0; i<chunksize; i++) {
if ((ibd->chunk[i] = (struct inbufdatum *) malloc(sizeof(struct inbufdatum))) == NULL) {
onError("Memory");
}
ibd->chunk[i]->tok = EOBUF;
ibd->chunk[i]->pylrval = NULL;
}
ibd->next = NULL;
return ibd;
}
static inbuftype * init_inbuf(chunksize)
int chunksize;
{
inbuftype * ib;
if ((ib = (inbuftype *)malloc(sizeof(inbuftype))) == NULL) {
printf("No more Memory!\n");
exit(1);
}
ib->bi = 0;
ib->data = init_inbufdata(chunksize);
ib->chunksize = chunksize;
ib->nextinput = 0;
return ib;
}
static void incbi(inbuf)
inbuftype * inbuf;
{
struct inbufdata * tmpdata;
if ((! ((inbuf->bi + 1) % inbuf->chunksize)) && (inbuf->bi != 0)) {
tmpdata = inbuf->data->next;
free(inbuf->data);
inbuf->data = tmpdata;
}
inbuf->bi++;
}
static void add2buf(inbuf, tok, pylrval)
inbuftype * inbuf; int tok; PyObject * pylrval;
{
struct inbufdata * orgibd = inbuf->data;
struct inbufdata * newibd;
while(inbuf->data->next != NULL)
inbuf->data = inbuf->data->next;
if ((! (inbuf->nextinput % inbuf->chunksize)) && (inbuf->nextinput != 0)) { /* make new chunk at end */
newibd = init_inbufdata(inbuf->chunksize);
newibd->chunk[0]->tok = tok;
newibd->chunk[0]->pylrval = pylrval;
inbuf->data->next = newibd;
} else {
inbuf->data->chunk[(inbuf->nextinput % inbuf->chunksize)]->tok = tok;
inbuf->data->chunk[(inbuf->nextinput % inbuf->chunksize)]->pylrval = pylrval;
}
inbuf->nextinput++;
inbuf->data = orgibd;
}
#define gettoken(ib) ((ib)->data->chunk[ (ib)->bi % (ib)->chunksize]->tok)
#define getpylrval(ib) ((ib)->data->chunk[ (ib)->bi % (ib)->chunksize]->pylrval)
static void dumpinbuf(inbuf)
inbuftype* inbuf;
{
int i, j;
struct inbufdata * orgibd = inbuf->data;
printf ("inbuf at %p with bi at %d and chunksize of %d and nextinput at %d:\n", inbuf, \
inbuf->bi, inbuf->chunksize, inbuf->nextinput);
j = 0;
for (inbuf->data; inbuf->data != NULL; inbuf->data = inbuf->data->next) {
printf("\tchunk %d:\n", j);
for (i=0; i < inbuf->chunksize; i++) {
printf("\t\tchunk[%d]->tok = %d; pylrval at %p\n",
i,
inbuf->data->chunk[i]->tok,
inbuf->data->chunk[i]->pylrval);
}
j++;
}
inbuf->data = orgibd;
}
/***********************************************************************
* the Stack
***********************************************************************/
static stacktype * init_stack (stackchunksize)
int stackchunksize;
{
stacktype * newstack;
if (( newstack = (stacktype *) malloc(sizeof(stacktype))) == NULL) {
PyErr_SetString(PyLRengineError, "Memory Error");
return NULL;
}
newstack->si = 0;
newstack->data = NULL;
newstack->chunksize = stackchunksize;
return newstack;
}
static struct stackdata * init_stackdata (stackchunksize)
int stackchunksize;
{
struct stackdata * newstackdata;
int i;
if ((newstackdata = (struct stackdata *) malloc (sizeof (struct stackdata))) == NULL) {
PyErr_SetString(PyLRengineError, "Memory Error");
return NULL;
}
if ((newstackdata->bucket = (struct stackdatum **) malloc (sizeof (struct stackdatum *) * stackchunksize)) == NULL) {
PyErr_SetString(PyLRengineError, "Memory Error");
return NULL;
}
for (i=0; i < stackchunksize; i++) {
if ((newstackdata->bucket[i] = (struct stackdatum *) malloc(sizeof (struct stackdatum))) == NULL) {
onError("Memory Error");
}
newstackdata->bucket[i]->state = -1;
newstackdata->bucket[i]->tok = -1;
newstackdata->bucket[i]->pylrval = NULL;
}
newstackdata->next = NULL;
return newstackdata;
}
static void push (stack, token, state, pylrval)
stacktype * stack;
int token;
int state;
PyObject * pylrval;
{
struct stackdata *newstackdata;
if (! (stack->si % stack->chunksize)) {
newstackdata = init_stackdata(stack->chunksize);
newstackdata->bucket[0]->tok = token;
newstackdata->bucket[0]->state = state;
newstackdata->bucket[0]->pylrval = pylrval;
newstackdata->next = stack->data;
stack->data = newstackdata;
} else {
stack->data->bucket[stack->si % stack->chunksize]->tok = token;
stack->data->bucket[stack->si % stack->chunksize]->state = state;
stack->data->bucket[stack->si % stack->chunksize]->pylrval = pylrval;
}
Py_XINCREF(pylrval);
stack->si++;
}
static void show_stack(stack)
struct stack_struct * stack;
{
struct stackdata * orgstackdata;
int i;
orgstackdata = stack->data;
printf("stack at %p:\n", stack);
for (stack->data; stack->data != NULL; stack->data = stack->data->next) {
printf("stack->data at %p\n", stack->data);
for (i=0; i<stack->chunksize; i++) {
printf ("stack->data->bucket[%d] = (%d, %d, %p)\n",
i,
stack->data->bucket[i]->tok,
stack->data->bucket[i]->state,
stack->data->bucket[i]->pylrval);
}
}
stack->data = orgstackdata;
}
/***********************************************************************
* This function returns the python objects stored on the stack so that
* they can then be passed to the appropriate function (popping the stack
* only occurs when a reduce operation is called, so the python objects
* returned get passed to the function associated with the production that
* is associated with popping items from the stack. see the method parser_parse
* for how this works in more detail
***********************************************************************/
static PyObject ** pop(stack, amt)
stacktype * stack;
int amt;
{
struct stackdata * tmpsd;
PyObject ** popped_pylrvals;
int c = 0;
if (amt == 0)
return NULL;
if ((popped_pylrvals = (PyObject **)malloc(sizeof(PyObject *) * amt)) == NULL)
onError("Memory Error");
if (stack->si < amt) {
PyErr_SetString(PyLRengineError, "popping too much from stack!!!");
return 0;
}
while (amt > 0 && stack->si >= 0) {
if ((popped_pylrvals[c] = (PyObject *)malloc(sizeof(PyObject))) == NULL)
onError("Memory Error");
if ((stack->si - 1) % stack->chunksize) {
stack->data->bucket[(stack->si -1) % stack->chunksize]->tok = -1;
stack->data->bucket[(stack->si -1) % stack->chunksize]->state = -1;
popped_pylrvals[c] = stack->data->bucket[(stack->si -1) % stack->chunksize]->pylrval;
stack->data->bucket[(stack->si -1) % stack->chunksize]->pylrval = NULL;
} else {
stack->data->bucket[0]->tok = -1;
stack->data->bucket[0]->state = -1;
popped_pylrvals[c] = stack->data->bucket[0]->pylrval;
stack->data->bucket[0]->pylrval = NULL;
tmpsd = stack->data->next;
free(stack->data);
stack->data = tmpsd;
}
amt--; stack->si--; c++; /* not quite ;) */
}
return popped_pylrvals;
}
#define stackstate(stack) \
(((stack)->data == NULL)?\
0:\
(stack)->data->bucket[((stack)->si - 1) % (stack)->chunksize]->state)
/***********************************************************************
* Production Info related functions
***********************************************************************/
static prodinfo_type ** Py_prodinfo2prodinfo (parserobj, py_prodinfo)
parserobject * parserobj;
PyObject * py_prodinfo;
{
prodinfo_type ** prodinfo;
PyObject * prodtuple;
int listsize;
register int listi;
listsize = PyList_Size(py_prodinfo);
if (listsize == -1)
onError("production info table is not a list!");
parserobj->prodinfosize = listsize;
if ((prodinfo = (prodinfo_type **) malloc (sizeof (prodinfo_type *) * listsize)) == NULL)
onError("No more Mem!");
for (listi=0; listi < listsize; listi++) {
if ((prodinfo[listi] = (prodinfo_type *) malloc (sizeof(prodinfo_type))) == NULL)
onError("Memory");
prodtuple = PyList_GetItem(py_prodinfo, listi);
if (! PyTuple_Check(prodtuple))
onError("Corrput Prodinfo table, must contain tuples of (len, callable)");
prodinfo[listi]->len = (short int) PyInt_AsLong(PyTuple_GetItem(prodtuple, 0));
if ((prodinfo[listi]->func = (PyObject *) malloc (sizeof(PyObject))) == NULL)
onError("Memory");
prodinfo[listi]->func = PyTuple_GetItem(prodtuple, 1);
prodinfo[listi]->lhsi = (int) PyInt_AsLong(PyTuple_GetItem(prodtuple, 2));
if ((! PyCallable_Check(prodinfo[listi]->func)) && (prodinfo[listi]->func != Py_None))
onError("corrupt prodinfo data, must contain tuples of (len, callable)");
Py_XINCREF(prodinfo[listi]->func);
}
return prodinfo;
}
static PyObject * prodinfo2Py_prodinfo(prodinfo, sz)
prodinfo_type ** prodinfo;
int sz;
{
int i;
PyObject * list;
PyObject * tuple;
PyObject * len;
PyObject * func;
PyObject * lhsi;
list = PyList_New(sz);
for (i=0; i<sz; i++) {
tuple = PyTuple_New(3);
len = Py_BuildValue("i", prodinfo[i]->len);
lhsi = Py_BuildValue("i", prodinfo[i]->lhsi);
func = prodinfo[i]->func;
PyTuple_SetItem(tuple, 0, len);
PyTuple_SetItem(tuple, 1, func);
PyTuple_SetItem(tuple, 2, lhsi);
PyList_SetItem(list, i, tuple);
}
return list;
}
/***********************************************************************
* the goto table, show and set routines
***********************************************************************/
#define GOTOERR -1
static void * mkgototable(parser, pygotos)
parserobject * parser;
PyObject * pygotos;
{
register int outerlen;
register int outerct;
register int innerlen;
register int innerct;
int ** gotos;
PyObject * innerlist;
PyObject * py_entry;
outerlen = PyList_Size(pygotos);
parser->goto_x = 0;
parser->goto_y = 0;
parser->gototable = NULL;
if (outerlen == -1)
onError("goto table must be a list of lists!");
if ((gotos = (int **) malloc(sizeof(int *) * outerlen)) == NULL)
onError("Memory Error");
for (outerct = 0; outerct < outerlen; outerct++) {
innerlist = PyList_GetItem(pygotos, outerct);
innerlen = PyList_Size(innerlist);
if (innerlen == -1)
onError ("goto table must be a list of lists!");
if ((gotos[outerct] = (int *) malloc (sizeof(int) * innerlen)) == NULL)
onError("Memory Error");
for (innerct = 0; innerct < innerlen; innerct++) {
py_entry = PyList_GetItem(innerlist, innerct);
if ((! PyInt_Check( py_entry)) && (py_entry != Py_None))
onError("goto table must be a list of list of either ints or None!");
if (py_entry == Py_None) {
gotos[outerct][innerct] = GOTOERR;
}
else {
gotos[outerct][innerct] = (int) PyInt_AsLong(py_entry);
}
}
}
parser->goto_x = outerlen;
parser->goto_y = innerlen;
parser->gototable = gotos;
}
static PyObject * show_gotos(self, args)
parserobject * self;
PyObject * args;
{
register int x;
register int y;
for (x=0; x < self->goto_x; x++) {
for (y=0; y < self->goto_y; y++) {
printf("%d ", self->gototable[x][y]);
}
printf ("\n");
}
Py_INCREF(Py_None);
return Py_None;
}
/***********************************************************************
* Action Table set and show
***********************************************************************/
#define ACTERR -1
static void * mkactiontable(parser, pyactions)
parserobject * parser; PyObject * pyactions;
{
register int outerlen;
register int outerct;
register int innerlen;
register int innerct;
actiontype *** actions;
PyObject * innerlist;
PyObject * py_tuple;
PyObject * py_act;
char * cact;
PyObject * py_arg;
int tuplelen;
parser->act_x = 0;
parser->act_y = 0;
parser->actiontable = NULL;
outerlen = PyList_Size(pyactions);
if (outerlen == -1)
onError("goto table must be a list of lists!");
if ((actions = (actiontype ***) malloc(sizeof(actiontype *) * outerlen)) == NULL)
onError("Memory Error");
for (outerct = 0; outerct < outerlen; outerct++) {
innerlist = PyList_GetItem(pyactions, outerct);
innerlen = PyList_Size(innerlist);
if (innerlen == -1)
onError ("goto table must be a list of lists!");
if ((actions[outerct] = (actiontype **) malloc (sizeof(actiontype *) * innerlen)) == NULL)
onError("Memory Error");
for (innerct = 0; innerct < innerlen; innerct++) {
if ((actions[outerct][innerct] = (actiontype *) malloc(sizeof(actiontype))) == NULL)
onError("Memory Error");
py_tuple = PyList_GetItem(innerlist, innerct);
if (! PyTuple_Check(py_tuple))
onError("goto table must be a list of list of tuples!");
tuplelen = PyTuple_Size(py_tuple);
if (tuplelen != 2)
onError("goto table must contain entries of tuples of length 2");
py_act = PyTuple_GetItem(py_tuple, 0);
py_arg = PyTuple_GetItem(py_tuple, 1);
if ((! PyString_Check(py_act)) || (! PyInt_Check(py_arg)))
onError("goto table's entries must be tuples of type string, int");
actions[outerct][innerct]->act = (short) *(PyString_AsString(py_act));
actions[outerct][innerct]->arg = (int) PyInt_AsLong(py_arg);
}
}
parser->act_x = outerlen;
parser->act_y = innerlen;
parser->actiontable = actions;
}
static PyObject * show_actions(self, args)
parserobject * self;
PyObject * args;
{
register int x;
register int y;
for (x=0; x < self->act_x; x++) {
for (y=0; y < self->act_y; y++) {
printf("(%c, %d), ", self->actiontable[x][y]->act, self->actiontable[x][y]->arg);
}
printf ("\n");
}
Py_INCREF(Py_None);
return Py_None;
}
/***********************************************************************
* Parser Type Info and internal routines
***********************************************************************/
staticforward PyTypeObject ParserType;
#define is_parserobject(v) ((v)->ob_type == &ParserType)
/***********************************************************************
* Parser Methods
***********************************************************************/
static PyObject *
parser_parse(self, args)
parserobject * self;
PyObject * args;
{
int tok, curstate, i, tuple_i;
PyObject * pylrval;
PyObject * fargs;
PyObject * fres;
actiontype * act;
PyObject ** pylrvals;
if (! PyArg_ParseTuple(args, "iO", &tok, &pylrval)) {
return NULL;
}
Py_XINCREF(pylrval);
add2buf(self->inbuf, tok, pylrval);
if ( self->toksadded < 1) {
self->toksadded++;
return Py_BuildValue("i", 1);
}
if ((stackstate(self->stack) < 0) || (gettoken(self->inbuf) < 0))
onError("PyLRTableIndexError");
act = self->actiontable[stackstate(self->stack)][gettoken(self->inbuf)];
if (act == NULL) {
onError("PyLRTableError, couldn't retrieve action");
}
if (act->act == SHIFT) {
push(self->stack, gettoken(self->inbuf), act->arg, getpylrval(self->inbuf));
incbi(self->inbuf);
return Py_BuildValue("i", 1);
} else if (act->act == REDUCE) {
pylrvals = pop(self->stack, self->prodinfo[act->arg - 1]->len);
if (PyErr_Occurred()) { return NULL; }
curstate = stackstate(self->stack);
fargs = PyTuple_New(self->prodinfo[act->arg - 1]->len);
for (i=0; i < self->prodinfo[act->arg - 1]->len ; i++) {
tuple_i = ((self->prodinfo[act->arg -1]->len - i) -1);
PyTuple_SetItem(fargs, tuple_i, pylrvals[i]);
}
fres = PyObject_CallObject(self->prodinfo[act->arg - 1]->func, fargs);
if (PyErr_Occurred())
return NULL;
Py_XINCREF(fres);
/* Py_DECREF(fargs);*/
push(self->stack, act->arg, self->gototable[curstate][self->prodinfo[act->arg - 1]->lhsi], fres);
return Py_BuildValue("i", 1);
} else if (act->act == ACCEPT) {
return Py_BuildValue("i", 0);
} else {
PyErr_SetString(PyLRengineError, "SyntaxError while parsing");
return NULL;
}
}
static PyObject *
parser_show_stack(self, args)
parserobject * self;
PyObject * args;
{
if (! PyArg_ParseTuple(args, ""))
return NULL;
show_stack(self->stack);
Py_XINCREF(Py_None);
return Py_None;
}
static PyObject *
parser_show_inbuf(self, args)
parserobject * self;
PyObject * args;
{
if (! PyArg_ParseTuple(args, ""))
return NULL;
dumpinbuf(self->inbuf);
Py_XINCREF(Py_None);
return Py_None;
}
static struct PyMethodDef Parser_methods[] = {
{ "parse", parser_parse, 1},
{ "showstack", parser_show_stack, 1},
{ "showbuf", parser_show_inbuf, 1},
{ "showgotos", show_gotos, 1},
{ "showacts", show_actions, 1},
{ NULL, NULL}, /* sentinel */
};
/***********************************************************************
* Basic type operations for ParserType
***********************************************************************/
static parserobject *
newparserobject (pyprodinfo, pyactions, pygotos, bufchunksize, stackchunksize)
PyObject * pyprodinfo;
PyObject * pyactions;
PyObject * pygotos;
int bufchunksize;
int stackchunksize;
{
parserobject *p;
p = PyObject_NEW(parserobject, &ParserType);
if (p == NULL)
onError("memory in init obj...");
p->stack = init_stack(stackchunksize);
p->inbuf = init_inbuf(bufchunksize);
mkgototable(p, pygotos);
mkactiontable(p, pyactions);
p->prodinfo = Py_prodinfo2prodinfo(p, pyprodinfo);
p->toksadded = 0;
if (PyErr_Occurred())
return NULL;
return p;
}
static void
parser_dealloc(self)
parserobject *self;
{
PyMem_DEL(self);
}
static int
parser_print(self, fp, flags)
parserobject * self;
FILE * fp;
int flags;
{
fprintf(fp, "<PyLRengine Object at %p>\n", self);
return 0;
}
static PyObject *
parser_getattr(self, name)
parserobject * self;
char * name;
{
if (strcmp(name, "state") == 0)
return Py_BuildValue("i", stackstate(self->stack));
if (strcmp(name, "stacksize") == 0)
return Py_BuildValue("i", (self->stack->si));
if (strcmp(name, "prodinfo") == 0)
return prodinfo2Py_prodinfo(self->prodinfo, self->prodinfosize);
if (strcmp(name, "__members__") == 0)
return Py_BuildValue("[sss]", "state", "stacksize", "prodinfo");
else
return Py_FindMethod(Parser_methods, (PyObject *) self, name);
}
static PyTypeObject ParserType = {
PyObject_HEAD_INIT(&PyType_Type)
0,
"NewEngine", /* type name */
sizeof(parserobject), /* basic size */
0, /* itemsize */
(destructor) parser_dealloc,
(printfunc) parser_print,
(getattrfunc) parser_getattr
};
/***********************************************************************
* Module Logic
***********************************************************************/
static PyObject *
parsernew(self, args)
PyObject* self;
PyObject* args;
{
PyObject* pyprodlengths = NULL;
PyObject* pyactions = NULL;
PyObject* pygotos = NULL;
PyObject* res = NULL;
int bufchunksize=50;
int stackchunksize=100;
CHECK_MALLOC(pyprodlengths)
CHECK_MALLOC(pyactions)
CHECK_MALLOC(pygotos)
if (!PyArg_ParseTuple(args, "O!O!O!|ii", &PyList_Type, &pyprodlengths,
&PyList_Type, &pyactions, &PyList_Type, &pygotos,
&bufchunksize, &stackchunksize))
goto finally;
res = (PyObject*) newparserobject(pyprodlengths, pyactions, pygotos, bufchunksize, stackchunksize);
finally:
Py_XDECREF(pyprodlengths);
Py_XDECREF(pyactions);
Py_XDECREF(pygotos);
return res;
}
static struct PyMethodDef PyLRengine_methods[] = {
{"NewEngine", (PyCFunction)parsernew},
{NULL, NULL}
};
void
initPyLRengine()
{
PyObject *m, *d;
m = Py_InitModule("PyLRengine", PyLRengine_methods);
d = PyModule_GetDict(m);
if (PyErr_Occurred())
Py_FatalError("can't initialize module PyLRengine");
}

44
PyLR/README Normal file
View file

@ -0,0 +1,44 @@
You must have python 1.5b1 or newer to run PyLR, as it works with the
builtin package support of that version.
To build:
1) decide whether you want the PyLRengine module to be a shared library.
If not, comment out the '*shared*' line in Setup.
2)type make -f Makefile boot; make
that should build the package.
To install:
If you want to install PyLR in your python distribution, just copy
over the PyLR directory to your site-packages directory. If you want
to save a little space, take a look at the __init__ file doc string in
the top directory and it shows the necessary files (distribution minus
Makefile, sedscript, etc). Also, there is a script (pgen.py) you may want
in /usr/local/bin or something more accessible as executable from your shell.
There is html documentation in the doc/ directory.
To test:
pgen.py PyLR/tstpspec tst
diff tst PyLR/Parsers/gram.py
the only difference should be the date line.
Feedback:
send comments/suggestions/bugreports/contributions to
scott@chronis.icgroup.com
thanks,
scott

2
PyLR/Setup Normal file
View file

@ -0,0 +1,2 @@
*shared*
PyLRengine PyLRenginemodule.c

2
PyLR/Setup.in Normal file
View file

@ -0,0 +1,2 @@
*shared*
PyLRengine PyLRenginemodule.c

39
PyLR/__init__.py Normal file
View file

@ -0,0 +1,39 @@
"""
This package has the following modules and characteristics:
(-) = not done yet
(*) = done
(?) = working on it
PyLR/ the top level module Language Genration Tools
__init__.py(*) this file
Lexer.py(*) defines the Lexer interface that the parser will use, uses re
Lexers/(?) a package to put lexers for different things
__init__ imports GrammarLex class
GrammarLex.py The module that defines the lexer for grammar specifications
Grammar.py(*) The module for dealing with grammars
PyLRenginemodule.so(*) The engine behind a LR parser (can do SLR, LR, and LALR)
Parser.py (*) A class interface to a parser
Parsers/(?) A package for storing Parsers
__init__ imports GrammarParser class
gram.py(*) the definition of the GrammarParser (import into Parsers/ namespace)
pgen.py(*) a script for parser generation
parsertemplate.py the doc string of this module is the template for parser generation
"""
import Parser,Lexers,Parsers
from Lexer import Lexer,SKIPTOK
__version__ = "$Id$"

75
PyLR/config.c Normal file
View file

@ -0,0 +1,75 @@
/* Generated automatically from /usr/lib/python1.5/config/config.c.in by makesetup. */
/* -*- C -*- ***********************************************
Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
The Netherlands.
All Rights Reserved
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation, and that the names of Stichting Mathematisch
Centrum or CWI or Corporation for National Research Initiatives or
CNRI not be used in advertising or publicity pertaining to
distribution of the software without specific, written prior
permission.
While CWI is the initial source for this software, a modified version
is made available by the Corporation for National Research Initiatives
(CNRI) at the Internet address ftp://ftp.python.org.
STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
******************************************************************/
/* Module configuration */
/* !!! !!! !!! This file is edited by the makesetup script !!! !!! !!! */
/* This file contains the table of built-in modules.
See init_builtin() in import.c. */
#include "Python.h"
extern void initregex();
extern void initpcre();
extern void initposix();
extern void initsignal();
/* -- ADDMODULE MARKER 1 -- */
extern void PyMarshal_Init();
extern void initimp();
struct _inittab _PyImport_Inittab[] = {
{"regex", initregex},
{"pcre", initpcre},
{"posix", initposix},
{"signal", initsignal},
/* -- ADDMODULE MARKER 2 -- */
/* This module "lives in" with marshal.c */
{"marshal", PyMarshal_Init},
/* This lives it with import.c */
{"imp", initimp},
/* These entries are here for sys.builtin_module_names */
{"__main__", NULL},
{"__builtin__", NULL},
{"sys", NULL},
/* Sentinel */
{0, 0}
};

142
PyLR/doc/PyLR.html Normal file
View file

@ -0,0 +1,142 @@
<html>
<body bgcolor="#ffffff">
<title> PyLR -- Fast LR parsing in python </title>
<!-- Changed by: Scott, 15-Dec-1997 -->
<center>
<h2>PyLR -- Fast LR parsing in python</h2>
<hr>
</center>
<ul>
<li> <a href="#whatis"> What is PyLR? </a>
<li> <a href="#status"> What is the current state of PyLR? </a>
<li> <a href="#where"> Where do I get PyLR? </a>
<li> <a href="#directions"> What will be added to PyLR? </a>
<li> <a href="#parsing"> Where do I find out about parsing theory? </a>
<li> <a href="#contrib"> How can I contribute to PyLR? </a>
</ul>
<hr>
<p><p>
<a name="whatis"><h2>What is PyLR?</h2></a>
PyLR is a package of tools for creating efficient parsers in python,
commonly known as a compiler compiler. PyLR is currently under
development. A ful release is almost complete, but there are still a few missing
features that would make it much nicer.
<p>
PyLR (pronounced 'pillar') was motivated by the frequencly with which parsers are hand
coded in python, the performance demands that these parsers are subject to (you just can't beat
native machine code for speed...), and academic curiosity (I wanted to really know how LR
parsing works).
<p><p>
<a name="status"> <h2>What is the current state of PyLR? </h2></a>
PyLR currently has class interfaces to a Grammar, a Lexer, an extension module
defining a parsing engine builtin type, and a parser generator script. All of these components
are based on sound parsing theory, but nevertheless haven't been tested by anyone but it's author.
The code as is stands can definitely be of use to anyone hand writing a parser in python, but some
of the nicer things in the complete package <em> just haven't been done yet </em>. <p>
PyLR is therefore under development, as it will always be. PyLR will be given a release number
once it supplies the following tools:
<ul>
<LI> write an 'engine' module that implements the LR parsing
algorythm in C with callbacks to python functions. (done) </LI>
<LI> write a Lexer class using re (done)</LI>
<LI> write a Grammar class that will take as input a context
free grammar and produce the parsing tables necessary to complement
the engine. This is to be done with LR(1) grammars (done and then
deleted -- extremely inefficient) and LALR(1) Grammars(done,
except with epsilon (empty) productions,<EM> much</EM> more efficient). </LI>
<LI> add a user interface -- manually write a lexer and Grammar
using the exisiting classes to parse lexer and grammar specifications
modelled after lex/flex and yacc/bison. (done for Grammars)
</LI>
<LI> write documentation. (usable, but not done)
</LI>
<LI> (post release) add grammars to various languages to the
distribution.
</LI>
</ul>
In addtion, I have the following plan for the project:
<UL>
<LI> make 'epsilon' (empty) productions work (many of them work now, but not all) </LI>
<LI> optimize the Lexer. Try to join it into one regular expression and derive
function calls from match object data. (done, still the slowest part of parsing)</LI>
<LI> add error specification routines. </LI>
<LI> change the parser generation algorithm to use only kernel LALR(1) items
in the computation of shift actions and gotos in the goto table. This
should significantly enhance the rate of parser generation, which is currently
a bit slow, but certainly acceptable for medium-sized grammars (&lt; ~100 productions)
(done!) this version
</LI>
<LI> write a Parser for sql, as used in <A HREF="http://www.pythonpros.com/arw/kwParsing/">gadfly</A>
</LI>
<LI> add operator precedence as an option to the parser specification (further down the road...)</LI>
</UL>
These things will probably be done over the next month or two (as I only have free time to give
to this project...Ahemmm...).
<p><p>
<a name="where"><h2>Where do I get PyLR? </h2></a>
You can get PyLR in one of two places, <a href="ftp://chronis.icgroup.com/pub/">here</a>
or <a href="PyLR.tgz"> here</a>. Both versions will be in sync with each other.
<p><p>
<a name="directions"><h2>What will be added to PyLR? </h2></a>
In addition to the <a href ="#status">list of things to finish </a> before a full release,
is published, PyLR could be used as the basis for an efficient datapath analyzer (optimizer),
for a front end to translation from one language to another, for type checking code, etc.<p>
As soon as the first release is completed, Tools to aid in all these things could well be added
to the package. Also, anyone wanting to contribute parser specifications for
languages of general use is most welcome.
<p><p>
<a name="parsing"> <h2>Where do I find out more about parsing? </h2></a>
Parsing was for a long time a big challenge for computer scientists. The need for
computer parsing originally came about with the first writing of compilers. Since then, the
theory behind parsing has been studied in depth and has pretty much stabilized as it no longer
really presents a big problem in terms of speed or size in terms of parsing todays computer
languages. One standard means of parsing that has been used for years because of its efficiency
is LR parsing (more particularly, LALR parsing). A lot of good information is in
<a href="http://www.amazon.com/exec/obidos/ISBN=1565920007">
Lex and Yacc</a> ,
<a href="http://www.amazon.com/exec/obidos/ISBN=0201100886">
The Dragon Book </a>, and
it seems like the only place to find good info on LALR parsing is in
<pre>
DeRemer, F.; and Pennello, T.Efficient computation of LALR(1) look-ahead sets, ACM Trans.
Program. Lang. Syst. 4 (1982), 615-649.
</pre>
Finally, to find out how to use PyLR, see the<A HREF="manual.html">PyLR manual</A>
<a name="contrib"> <h2>How do I contribute to PyLR? </h2></a>
<a href="mailto:scott@chronis.icgroup.com">mail me. </a>

313
PyLR/doc/manual.html Normal file
View file

@ -0,0 +1,313 @@
<html>
<title> PyLR maual </title>
<!-- Changed by: Scott, 12-Dec-1997 -->
<body bgcolor="#ffffff">
<CENTER>
<h2>PyLR Manual</h2>
</CENTER>
This is the PyLR parser generator manual. PyLR is a parser generator package for
use with python (version 1.5b1 or better). This manual addresses how to use the
package to produce parsers.
<p>
<UL>
<LI> <A HREF="#Audience">Intended Audience</A> </LI>
<LI> <A HREF="#Basics">The Basics</A> </LI>
<UL>
<LI> <A HREF="#Lexer">Writing a Lexer</A> </LI>
<LI> <A HREF="#Grammar">Writing a Grammar</A> </LI>
<LI> <A HREF="#Parser">Putting it together, producing the parser</A> </LI>
</UL>
<LI> <A HREF="#Struct">PyLR Structure Overview</A> </LI>
<LI> <A HREF="#API">Programming with the Classes</A> </LI>
</UL>
<HR>
<p>
<p>
<A NAME="Audience"> <center> <h3> Audience </h3></center></A>
Parsing can be very complicated stuff, and it helps to understand what exactly is
happening when something is parsed when writing a parser. Unfortunately (for the impatient),
the topic of Parsing has been the subject of many a dissertation. This document will present
two views on the data it presents. One is a technical view which will contain terms <EM>without
defining them</EM>. These terms are generally understood by those who have studied parsing theory
(such as LALR, shift-reduce, etc), and probably not understood by those that haven't. For this
reason, I have attempted to include an intuitive view whenever possible, particularly in the
section <A HREF="#Basics">The Basics</A>. There should be enough in that section to let anyone
interested who is interested and familiar with python write a parser.
<HR><p>
<A NAME="Basics"><center><h3>The Basics</h3></center></A> <br>
This section refers to writing lexers, Grammars, and then producing a parser with
these parts. In PyLR, a lexer is part of a parser. This simplifies the interface to
actually doing the parsing. There is an 'engine' which takes the output of the lexer and triggers
the back end of parsing. So we'll start with writing a lexer.
<UL>
<LI>
<A NAME="Lexer"><h4>Writing a Lexer</h4></A><br>
When some text is to be parsed, it first has to go through lexical analysis. This
process is done with a lexer. PyLR provides a base Lexer class to help write a lexer.
The process isn't hard. A lexer just returns the atomic parts of the text. You define what is
atomic by using regular expressions to match the atomic parts. Each atomic definition
you give is automatically given a token value (an int). When the lexer scans text, it returns
a stream of <TT>(token, value)</TT> pairs where the the token is the token value that was assigned
to the match definition and the the value is an arbitrary python value (class, string, int, whatever).
The <TT>(token, value)</TT> pair is then passed to the parser for further processing.
<p>
Frequently, lexers will return the matched text as the
<TT>value</TT> in the <TT>(token, value)</TT> pair. This is the
default when you subclass the provided Lexer class. However, there
are a lot of different things you may want to happen upon finding a
match. For example, sometimes you will want to match something but
not use the match or pass it on to the parser.
<p>
There is a function in the base class that
provides for all these and more options. It is the <br>
<TT>.addmatch(compiled_regex, tokenname="", function=None,
flags=MAPTOK|EXECFN)</TT> <br> method. This method requires only a regular
expression as its argument, but in practice, token names should be passed along with
the re. This practice will make your grammar more readable and easier
to write later. <br> The <TT>function</TT> argument, if specified, will make the
lexer execute that function with the resulting match object as it's
one and only argument. The lexer will then return the return value of
the function as the <TT>value</TT> in the <TT>(token, value)</TT> pair
it returns. By default, the lexer will just return the token and the associated
matched text.
<br>
The <TT>flags</TT> argument not only defaults to the reasonable MAPTOK|EXECFN, but also adopts to
the values of the other arguments you pass. This way, you dont' have to bother with them much. The one
time it's common to use the flags is when you want the lexer to match something but not return anything until
the next match. It is common to have whitespace treated in this fashion. For this option, you use
<TT>.addmatch(re.compile(r"\s+"), "", None, Lexer.SKIPTOK)</TT>. The example below utilizes all these
options.
<p>
Finally, please note the call of the <TT>.seteof()</TT> function at the end of the <TT>__init__</TT>
method. This is necessary for all subclassed lexers. The reason it is there is that the token value
of EOF is expected to be one greater than any other token value by the parser. <EM>Your lexer will not
work with the parser api without this call.</EM>
<p>
Example
<pre>
from PyLR import Lexer
import re, string
#
# this function will handle matches to an integer. It passes the
# integer value to the parser and does the conversion here.
#
def intfunc(m):
return string.atoi(m.group(0))
class mathlex(Lexer.Lexer):
#
# define the atomic parts with regular expressions
#
INT = re.compile(r"([1-9]([0-9]+)?)|0") # matches an integer
LPAREN = re.compile(r"\(") # matches '('
RPAREN = re.compile(r"\)") # matches ')'
TIMES = re.compile(r"\*") # matches '*'
PLUS = re.compile(r"\+") # matches '+'
WS = re.compile(r"\s+") # matches whitespace
def __init__(self):
#
# initialize with the base class
#
Lexer.Lexer.__init__(self)
#
# addmatch examples
#
self.addmatch(self.INT, idfunc, "INT")
for p,t in ( (self.PLUS, "PLUS"), (self.TIMES,"TIMES"),
(self.LPAREN, "LPAREN"), (self.RPAREN, "RPAREN"),):
self.addmatch(p, None, t)
self.addmatch(self.ws, None, "", Lexer.SKIPTOK)
self.seteof()
# create the lexer
lexer = mathlex()
# test it with the interactivetest method
lexer.interactivetest()
</pre>
</LI>
<hr>
<LI> <A NAME="Grammar"><h4>Writing a Grammar</h4></A><br>
The grammar you write is somewhat easier than the lexer. You don't have
to code anything. There is a program in the PyLR distribution called <TT>pgen.py</TT>
that will take your Grammar specification and produce a parser for you. The parser that is
produced is of the shift-reduce variety of LR parsers and uses LALR(1) items to help produce
the parsing tables. In other words, it uses a parsing algorithm that is quite efficient (implemented
in C) and will handle most modern day programming language constructs without a problem. These
qualities have made this parsing algorithm a very commonly used one in compiler construction since
October 1982.
<p>
When you write a grammar, you are specifying a <EM>context free grammar</EM> in normal form,
with a few addons to help generate the parser in Python. In other words, you specify a series
of productions. For example, to specify a very simple math grammar that will work with the
above lexer, you may state something like this:
<pre>
expression: expression PLUS term
| term;
term: term TIMES factor
| factor;
factor: LPAREN expression RPAREN
| INT;
</pre>
The identifiers in all uppercase are conventionally <EM>terminal symbols</EM>.
These will be identified by the lexer and returned to the parser. The identifiers
in all lowercase are the <EM>nonterminal symbols</EM>. Each nonterminal must appear
on the left somewhere. The corresponding right side may have terminals or non terminals.
You may not have empty (epsilon) right hand sides (yet).
<p>
Whenever the parser recognizes a production, it will call a function. You may specify
the name of the method of the parser class to be invoked for a production by adding
a parenthesized name to the right of the production. The above grammar rewritten with
method name specifications looks like this (This part will become more clear after the next step,
stay with it!).
<pre>
expression: expression PLUS term (addfunc)
| term;
term: term TIMES factor (timesfunc)
| factor;
factor: LPAREN expression RPAREN (parenfunc)
| INT;
</pre>
</LI>
<LI> <A NAME="Parser"><h4>Putting it all together: making the parser</h4></A><br>
When you create a parser, you are creating a class that is intended to act like
a class in library code. That is, it will mostly be used by subclassing that class.
The parser you create will parse what it was intended to, but it won't do anything
with the parse tree unless you subclass it and define some special methods.
<p>
Those methods must have the name specified in the grammar you wrote. For example, if you
built a parser for the above grammar, in order for it to actually add things together,
you would have to subclass the class that was produced and then define the methods
<TT>addfunc</TT>, <TT>timesfunc</TT>, and <TT>parenfunc</TT>. When each of these methods is called
it will be passed the values on the right hand side of the corresponding production as arguments.
Those values are either the value returned by the lexer, if the symbol is terminal, or
a value returned by one of these special methods, if the symbol is a nonterminal.
<p>
In the above example, since the rest of the productions only have one item, it doesn't really matter
whether or not they have methods, the parser just calls a reasonable default.
<p>
As you can see, we've defined most of what is necessary for building a parser. But the above should tell
you that there are a few other things that you may want to define, like the name of the class that
is produced, or what lexer is used with the parser. Describing these things along with a grammar like
the example above is writing a parser specification for PyLR. A reasonable parser specification for the
example we've been following:
<pre>
_class SimpleMathParser
_lex mathlex.mathlex()
_code from PyLR.Lexers import mathlex
"""
expression: expression PLUS term (addfunc)
| term;
term: term TIMES factor (timesfunc)
| factor;
factor: LPAREN expression RPAREN (parenfunc)
| INT;
"""
</pre>
the <TT>_class </TT> keyword defines the name of the class that the parser will take
the <TT>_lex</TT> keyword defines the code used to intialize that parser's lexer
the <TT>_code</TT> keyword defines extra code at the top of the output file. Multiple
instances of this keyword will cause the extra source code (in python) to be accumulated.
the triple quotes delimit the grammar section.
<p><em>
Please note, the above syntax is subject to change as this is an alpha release and I feel
that it can be improved upon.</em>
<p>
now you can create a parser. Just use the <TT>pgen.py</TT> script and it will output
your source code:
<pre>
pgen.py mathparserspec tst.py
chronis 3:34am $ python
Python 1.5b1 (#1, Nov 27 1997, 19:51:47) [GCC 2.7.2] on linux2
Copyright 1991-1995 Stichting Mathematisch Centrum, Amsterdam
>>> import tst
>>> dir(tst)
['PyLR', 'SimpleMathParser', '__builtins__', '__doc__', '__file__', '__name__', '_actiontable', '_gototable', '_prodinfo', 'mathlex']
>>> print tst.SimpleMathParser.__doc__
this class was produced automatically by the PyLR parser generator.
It is meant to be subclassed to produce a parser for the grammar
expression -> expression PLUS term (addfunc)
| term; (unspecified)
term -> term TIMES factor (timesfunc)
| factor; (unspecified)
factor -> LPAREN expression RPAREN (parenfunc)
| INT; (unspecified)
While parsing input, if one of the above productions is recognized,
a method of your sub-class (whose name is indicated in parens to the
right) will be invoked. Names marked 'unspecified' will not me invoked.
usage:
class MySimpleMathParser(SimpleMathParser):
# ...define the methods for the productions...
p = MySimpleMathParser(); p.parse(text)
>>> class MP(tst.SimpleMathParser):
... def __init__(self):
... tst.SimpleMathParser.__init__(self)
... def addfunc(self, left, plus, right):
... print "%d + %d" % (left, right)
... return left + right
... def parenfunc(self, lp, expr, rp):
... print "handling parens"
... return expr
... def timesfunc(self, left, times, right):
... print "%d * %d" % (left, right)
... return left * right
...
>>> mp = mathparser()
>>> mp.parse("4 * (3 + 2 * 5)")
2 * 5
3 + 10
handling parens
4 * 13
</pre>
</LI>
</UL>
<A NAME="Struct"><center><h3>Structure</h3></center></A>
Nothing yet, sorry it's an alpha, read the source.
<A NAME="API"><center><h3>API</h3></center></A>
Nothing yet, sorry it's an alpha. Read the source.
</html>

170
PyLR/gramnew.py Normal file
View file

@ -0,0 +1,170 @@
"""
out -- created Sun Dec 14 21:41:11 1997
This file was automatically generated by the PyLR parser generator.
It defines the tables 'actiontable', 'gototable', and 'prodinfo'. These
tables are used to give functionality to a parsing engine. It also defines
A Parser class called GrammarParser which will use this engine. It's Usage is
indicated in GrammarParser's doc-string.
"""
#
# this section contains source code added by the user
# plus 'import PyLR'
#
import PyLR.Lexers
import PyLR.Parser
import PyLR
#
# the action table ('s', 4) means shift to state 4,
# ('r', 4) means reduce by production number 4
# other entries are errors. each row represents a state
# and each column a terminal lookahead symbol (plus EOF)
# these symbols are ['LEX', 'CODE', 'CLASS', 'ID', 'COLON', 'SCOLON', 'OR', 'LPAREN', 'RPAREN', 'GDEL', 'EOF']
#
_actiontable = [
[('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('a', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 1)],
[('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 2)],
[('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 7), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 3)],
[('r', 4), ('r', 4), ('r', 4), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 4), ('', -1)],
[('r', 5), ('r', 5), ('r', 5), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 5), ('', -1)],
[('r', 6), ('r', 6), ('r', 6), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 6), ('', -1)],
[('r', 7), ('r', 7), ('r', 7), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 7), ('', -1)],
[('r', 8), ('r', 8), ('r', 8), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 8), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 9), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 9), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 10), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 10), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('s', 16), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 18), ('s', 20), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 11), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 11), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 12), ('r', 12), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 13), ('r', 13), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 14), ('r', 14), ('s', 23), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 24), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 25), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 15), ('r', 15), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 27), ('', -1), ('r', 16), ('r', 16), ('r', 16), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 18), ('', -1), ('r', 18), ('r', 18), ('r', 18), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 19), ('', -1), ('r', 19), ('r', 19), ('r', 19), ('', -1), ('', -1), ('', -1)]
]
#
# the goto table, each row represents a state
# and each column, the nonterminal that was on the lhs of the
# reduction
#
_gototable = [
[1, 2, 3, 9, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, 4, None, 8, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, 6, 14, None, None, None, None],
[None, None, None, None, None, 13, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, 17, 19, 22, 26],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, 21, 22, 26],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None]
]
#
# This is the prodinfo table. each row represents a production
# the entries are the length of the production, the name of a method
# in an instance of the GrammarParser class below that gets called
# when that production occurs, and the index of the lhs in the
# nonterminals (as in # the gototable)
#
_prodinfo = [
(1, 'unspecified', 0), # pspec -> ['gspec']
(2, 'unspecified', 0), # pspec -> ['pydefs', 'gspec']
(3, 'unspecified', 1), # gspec -> ['GDEL', 'lhsdeflist', 'GDEL']
(2, 'unspecified', 2), # pydefs -> ['pydefs', 'pydef']
(1, 'unspecified', 2), # pydefs -> ['pydef']
(1, 'lexdef', 3), # pydef -> ['LEX']
(1, 'addcode', 3), # pydef -> ['CODE']
(1, 'classname', 3), # pydef -> ['CLASS']
(2, 'unspecified', 4), # lhsdeflist -> ['lhsdeflist', 'lhsdef']
(1, 'unspecified', 4), # lhsdeflist -> ['lhsdef']
(4, 'lhsdef', 5), # lhsdef -> ['ID', 'COLON', 'rhslist', 'SCOLON']
(1, 'singletolist', 6), # rhslist -> ['rhs']
(3, 'rhslist_OR_rhs', 6), # rhslist -> ['rhslist', 'OR', 'rhs']
(1, 'rhs_idlist', 7), # rhs -> ['rhsidlist']
(4, 'rhs_idlist_func', 7), # rhs -> ['rhsidlist', 'LPAREN', 'ID', 'RPAREN']
(1, 'unspecified', 8), # rhsidlist -> ['idlist']
(0, 'rhseps', 8), # rhsidlist -> []
(2, 'idl_idlistID', 9), # idlist -> ['idlist', 'ID']
(1, 'idlistID', 9), # idlist -> ['ID']
]
class GrammarParser (PyLR.Parser.Parser):
"""
this class was produced automatically by the PyLR parser generator.
It is meant to be subclassed to produce a parser for the grammar
pspec -> gspec (unspecified)
| pydefs gspec; (unspecified)
gspec -> GDEL lhsdeflist GDEL; (unspecified)
pydefs -> pydefs pydef (unspecified)
| pydef; (unspecified)
pydef -> LEX (lexdef)
| CODE (addcode)
| CLASS; (classname)
lhsdeflist -> lhsdeflist lhsdef (unspecified)
| lhsdef; (unspecified)
lhsdef -> ID COLON rhslist SCOLON; (lhsdef)
rhslist -> rhs (singletolist)
| rhslist OR rhs; (rhslist_OR_rhs)
rhs -> rhsidlist (rhs_idlist)
| rhsidlist LPAREN ID RPAREN; (rhs_idlist_func)
rhsidlist -> idlist (unspecified)
| ; (unspecified)
idlist -> idlist ID (idl_idlistID)
| ID; (idlistID)
While parsing input, if one of the above productions is recognized,
a method of your sub-class (whose name is indicated in parens to the
right) will be invoked. Names marked 'unspecified' should be ignored.
usage:
class MyGrammarParser(GrammarParser):
# ...define the methods for the productions...
p = MyGrammarParser(); p.parse(text)
"""
def __init__(self):
lexer = PyLR.Lexers.GrammarLex()
PyLR.Parser.Parser.__init__(self, lexer, _actiontable, _gototable, _prodinfo)

75
PyLR/parsertemplate.py Normal file
View file

@ -0,0 +1,75 @@
#
# this file's doc string is used as a template for producing PyLRtables.py.
# PyLRtables.py containes the source code to produce the engine part of a
# parser.
#
'''\
"""
%(filename)s -- created %(date)s
This file was automatically generated by the PyLR parser generator.
It defines the tables 'actiontable', 'gototable', and 'prodinfo'. These
tables are used to give functionality to a parsing engine. It also defines
A Parser class called %(parsername)s which will use this engine. It's usage
is indicated in %(parsername)s's doc-string.
"""
#
# this section contains source code added by the user
# plus 'import PyLR'
#
%(extrasource)s
import PyLR
#
# the action table
# 's' means shift
# ('r',<n>) means reduce with production n
# 'a' means accept
# '' means error
# each row represents a state and each column a terminal lookahead symbol
# (excluding symbols with Lexer.SKIPTOK of course).
# Lexer symbols are:
# %(symbols)s
#
_actiontable = %(actiontable)s
#
# the goto table, each row represents a state
# and each column, the nonterminal that was on the lhs of the
# reduction
#
_gototable = %(gototable)s
#
# This is the prodinfo table. each row represents a production
# the entries are the length of the production, the name of a method
# in an instance of the %(parsername)s class below that gets called
# when that production occurs, and the index of the lhs in the
# nonterminals (as in # the gototable)
#
_prodinfo = %(prodinfo)s
class %(parsername)s(PyLR.Parser.Parser):
"""
this class was produced automatically by the PyLR parser generator.
It is meant to be subclassed to produce a parser for the grammar
%(grammar)s
While parsing input, if one of the above productions is recognized,
a method of your sub-class (whose name is indicated in parens to the
right) will be invoked. Names marked 'unspecified' should be ignored.
usage:
class My%(parsername)s(%(parsername)s):
# ...define the methods for the productions...
p = My%(parsername)s(); p.parse(text)
"""
def __init__(self):
lexer = %(lexerinit)s
PyLR.Parser.Parser.__init__(self, lexer, _actiontable, _gototable, _prodinfo)
'''

118
PyLR/pgen.py Normal file
View file

@ -0,0 +1,118 @@
#!/usr/bin/env python
import PyLR, PyLR.Grammar, sys, getopt
from PyLR.Parsers import GrammarParser
class ParserParser(GrammarParser):
def __init__(self):
GrammarParser.__init__(self)
self.result = [] # to be populated with productions
self.funcmap = {}
self.usercode = ""
self.lexdef = ""
self.classname = "MyParser"
self.idlist = []
def idlistID(self, id):
"idlist -> id"
self.idlist.append(id)
return [id]
def singletolist(self, el):
"rhslist -> rhs"
return [el]
def idl_idlistID(self, l, el):
"idlist -> idlist id"
self.idlist.append(id)
l.append(el)
return l
def rhs_idlist(self, l):
"rhs -> idlist"
return l
def rhseps(self):
"rhseps -> "
return []
def rhs_idlist_func(self, l, lp, id, rp):
"rhs -> idlist LPAREN ID RPAREN"
self.funcmap[tuple(l)] = id
return l
def rhslist_OR_rhs(self, l, OR, el):
"rhs -> rhslist OR rhs"
l.append(el)
return l
def lhsdef(self, lhs, COLON, rhslist, SCOLON):
"lhsdef -> ID COLON rhslist SCOLON"
print lhs
for rhs in rhslist:
self.result.append(PyLR.Grammar.Production(lhs, rhs))
return None
def lexdef(self, ld):
self.lexdef = ld
def addcode(self, code):
self.usercode = self.usercode + "\n" + code
def classname(self, name):
self.classname = name
def parse(self, text, outf, verbose=0):
global g, toks, lexer
PyLR.Parser.Parser.parse(self, text, verbose)
# insert the functionnames
for p in self.result:
funcname = self.funcmap.get(tuple(p.RHS), "unspecified")
p.setfuncname(funcname)
#evaluate the lexer
exec(self.usercode)
lexer = eval(self.lexdef)
# generate the tokens for grammar
toks = lexer.getTokenList()
# change the symbols to their numbers
for p in self.result:
for si in range(len(p.RHS)):
if p.RHS[si] in toks:
p.RHS[si] = toks.indexof(p.RHS[si])
g = PyLR.Grammar.LALRGrammar(self.result, toks)
print g
g.extrasource = self.usercode
print "done parsing, about to start parser generation (writing to %s)" % outf
if self.lexdef:
g.writefile(outf, self.classname, self.lexdef)
else:
g.writefile(outf, self.classname)
print "done"
def main():
usage = "pgen.py infile outfile"
args = sys.argv[1:]
if len(args) != 2:
print usage
sys.exit(0)
inf = args[0]
outf = args[1]
if inf == "-":
f = sys.stdin
else:
f = open(inf)
pspec = f.read()
# f.close() # dont close stdin
global pp # for use with python -i pgen.py <inf> <outf>
pp = ParserParser()
verbose=1
pp.parse(pspec, outf, verbose)
if __name__ == "__main__":
main()

28
PyLR/sedscript Normal file
View file

@ -0,0 +1,28 @@
1i\
# Generated automatically from Makefile.pre.in by sedscript.
s%@VERSION[@]%1.5%
s%#@SET_CCC[@]%CCC=g++%
s%@CC[@]%gcc%
s%@RANLIB[@]%ranlib%
s%@OPT[@]%-g -O2%
s%@LDFLAGS[@]%%
s%@DEFS[@]%-DHAVE_CONFIG_H%
s%@LIBS[@]%-lieee -ldl -lpthread%
s%@LIBM[@]%-lm%
s%@LIBC[@]%%
s%@MACHDEP[@]%linux2%
s%^prefix=.*%prefix= /usr%
s%^exec_prefix=.*%exec_prefix= ${prefix}%
s%@SO[@]%.so%
s%@LDSHARED[@]%gcc -shared -lc%
s%@CCSHARED[@]%-fPIC%
s%@LINKFORSHARED[@]%-Xlinker -export-dynamic%
s%@LINKCC[@]%$(CC)%
/^#@SET_CCC@/d
/^installdir=/s%=.*%= /usr%
/^exec_installdir=/s%=.*%=/usr%
/^srcdir=/s%=.*%= .%
/^VPATH=/s%=.*%= .%
/^LINKPATH=/s%=.*%= %
/^BASELIB=/s%=.*%= %
/^BASESETUP=/s%=.*%= %

51
PyLR/tstpspec Normal file
View file

@ -0,0 +1,51 @@
#
# this is a Grammar Spec for parsing PyLR style
# Grammars
#
#
# this is the pydefs section, where you name the output class
# , add code, state how to initialize the lexer
#
_class GrammarParser
_code import PyLR.Lexers
_code import PyLR.Parser
_lex PyLR.Lexers.GrammarLex()
#
# this is the Grammar spec part, where you specify
# the productions and optionally their corresponding
# method names in the generated Parser class (or subclasses
# of it)
#
"""
pspec: gspec |
pydefs gspec;
gspec: GDEL lhsdeflist GDEL;
pydefs: pydefs pydef |
pydef;
pydef: LEX (lexdef) |
CODE (addcode) |
CLASS (classname);
lhsdeflist: lhsdeflist lhsdef |
lhsdef;
lhsdef: ID COLON rhslist SCOLON (lhsdef);
rhslist: rhs (singletolist) |
rhslist OR rhs (rhslist_OR_rhs);
rhs: rhsidlist (rhs_idlist) |
rhsidlist LPAREN ID RPAREN (rhs_idlist_func);
rhsidlist: idlist
| (rhseps);
idlist: idlist ID (idl_idlistID) |
ID (idlistID);
"""

25
README Normal file
View file

@ -0,0 +1,25 @@
LinkChecker
=============
With LinkChecker you can check your HTML documents for broken links.
Features:
o recursive checking
o multithreaded
o output can be colored or normal text, HTML, SQL or a GML sitemap graph
o HTTP, FTP, mailto:, Gopher, Telnet and local file links are supported
Javascript and HTTPS links are currently ignored
o restrict link checking to your local domain
o HTTP proxy support
o give username/password for HTTP and FTP authorization
o robots.txt exclusion protocol support
LinkChecker is licensed under the GNU Public License.
Credits go to Guido van Rossum for making Python. His hovercraft is
full of eels!
As this program is directly derived from my Java link checker, additional
credits go to Robert Forsman (the author of JCheckLinks) and his
robots.txt parse algorithm.
I want to thank everybody who gave me feedback, bug reports and
suggestions.

84
README.dns Normal file
View file

@ -0,0 +1,84 @@
Release 2.2, Mon Apr 27 22:59:16 EST 1998
This is a test release of the DNS code, as originally written by
Guido van Rossum, and with a hopefully nicer API bolted over the
top of it by Anthony Baxter <arb@connect.com.au>. It's also in a
python 1.5 package.
There are several known bugs/unfinished bits
- processing of AXFR results is not done yet.
- something I've done recently has broken the DnsAsyncRequest(). Bummer.
- doesn't do IPv6 DNS requests (type AAAA) (as per [RFC 1886])
- docs, aside from this file
- all sorts of other stuff that I've probably forgotten.
Stuff it _does_ do:
processes /etc/resolv.conf - at least as far as nameserver directives go.
tries multiple nameservers.
nicer API - see below.
returns results in more useful format.
optional timing of requests.
default 'show' behaviour emulates 'dig' pretty closely.
support for asyncore.py ### NOTE: currently broken a bit.
To use:
import DNS
reqobj=DNS.Request(args)
reqobj.req(args)
args can be a name, in which case it takes that as the query, and/or a series
of keyword/value args. (see below for a list of args)
when calling the 'req()' method, it reuses the options specified in the
DNS.Request() call as defaults.
options are applied in the following order:
those specified in the req() call
or, if not specified there,
those specified in the creation of the Request() object
or, if not specified there,
those specified in the DNS.defaults dictionary
name servers can be specified in the following ways:
by calling DNS.ParseResolvConf(), which will load the DNS.servers
from the system's /etc/resolv.conf file
by specifying it as an option to the request
by manually setting DNS.defaults['server'] to a list of server IP
addresses to try
XXXX It should be possible to load the DNS servers on a windows or
mac box, from where-ever they've squirrelled them away
name="host.do.main" # the object being looked up
qtype="SOA" # the query type, eg SOA, A, MX, CNAME, ANY
protocol="udp" # "udp" or "tcp" - usually you want "udp"
server="nameserver" # the name of the nameserver. Note that you might
# want to use an IP address here
rd=1 # "recursion desired" - defaults to 1.
other: opcode, port, ...
There's also some convenience functions, for the lazy:
to do a reverse lookup:
>>> print DNS.revlookup("192.189.54.17")
yarrina.connect.com.au
to look up all MX records for an entry:
>>> print DNS.mxlookup("connect.com.au")
[(10, 'yarrina.connect.com.au'), (100, 'warrane.connect.com.au')]
Documentation of the rest of the interface will have to wait for a
later date. Note that the DnsAsyncRequest stuff is currently not
working - I haven't looked too closely at why, yet.
There's some examples in the tests/ directory - including test5.py,
which is even vaguely useful. It looks for the SOA for a domain, checks
that the primary NS is authoritative, then checks the nameservers
that it believes are NSs for the domain and checks that they're
authoritative, and that the zone serial numbers match.
see also README.guido for the original docs.
comments to me - arb@connect.com.au

136
StringUtil.py Normal file
View file

@ -0,0 +1,136 @@
import string,re
HtmlTable = [
("ä","&auml;"),
("ö","&ouml;"),
("ü","&uuml;"),
("Ä","&Auml;"),
("Ö","&Ouml;"),
("Ü","&Uuml;"),
("ß","&szlig;"),
("&","&amp;"),
("<","&lt;"),
(">","&gt;"),
("é","&eacute;"),
("è","&egrave")
]
SQLTable = [
("'","''")
]
def stripHtmlComments(data):
i = string.find(data, "<!--")
while i!=-1:
j = string.find(data, "-->", i)
if j == -1:
break
data = data[:i] + data[j+3:]
i = string.find(data, "<!--")
return data
def stripFenceComments(data):
lines = string.split(data, "\n")
ret = None
for line in lines:
if not re.compile("\s*#.*").match(line):
if ret:
ret = ret + "\n" + line
else:
ret = line
return ret
def rstripQuotes(s):
"Strip optional ending quotes"
if len(s)<1:
return s
if s[-1]=="\"" or s[-1]=="'":
s = s[:-1]
return s
def lstripQuotes(s):
"Strip optional leading quotes"
if len(s)<1:
return s
if s[0]=="\"" or s[0]=="'":
s = s[1:]
return s
def stripQuotes(s):
"Strip optional quotes"
if len(s)<2:
return s
if s[0]=="\"" or s[0]=="'":
s = s[1:]
if s[-1]=="\"" or s[-1]=="'":
s = s[:-1]
return s
def indent(s, level):
return indentWith(s, level * " ")
def indentWith(s, indent):
i = 0
while i < len(s):
if s[i]=="\n" and (i+1) < len(s):
s = s[0:(i+1)] + indent + s[(i+1):]
i = i+1
return s
def blocktext(s, width):
"Adjust lines of s to be not wider than width"
# split into lines
s = string.split(s, "\n")
line = ""
ret = ""
while len(s):
line = line + s.pop()
while len(line) > width:
i = getLastWordBoundary(line, width)
ret = ret + string.strip(line[0:i]) + "\n"
line = string.strip(line[i:])
return ret + line
def getLastWordBoundary(s, width):
"""Get maximal index i of a whitespace char in s with 0 < i < width.
Note: if s contains no whitespace this returns width-1"""
match = re.compile(".*\s").match(s[0:width])
if match:
return match.end()
return width-1
def applyTable(table, str):
for mapping in table:
str = string.replace(str, mapping[0], mapping[1])
return str
def texify(str):
return applyTable(TexTable, str)
def sqlify(str):
if not str:
return "NULL"
return "'"+applyTable(SQLTable, str)+"'"
def htmlify(str):
return applyTable(HtmlTable, str)
def getLineNumber(str, index):
i=0
if index<0: index=0
line=1
while i<index:
if str[i]=='\n':
line = line + 1
i = i+1
return line

10
TODO Normal file
View file

@ -0,0 +1,10 @@
Use leading '_' for private functions.
Is there a way to cleanly stop arbitrary Thread objects
(with exit handler)? Mail me solutions!
Patch the PyLR parser module to suit my needs.
Write a graph layout algorithm.
Write a little tool to produce an image of the GML output.

18
create.sql Normal file
View file

@ -0,0 +1,18 @@
-- tested with postgresql
drop table linkdb;
create table linkdb (
urlname varchar(50) not null,
recursionlevel int not null,
parentname varchar(50),
baseref varchar(50),
errorstring varchar(50),
validstring varchar(50),
warningstring varchar(50),
infoString varchar(150),
valid int,
url varchar(50),
line int,
cached int
);

1
debian/.cvsignore vendored Normal file
View file

@ -0,0 +1 @@
tmp

107
debian/changelog vendored Normal file
View file

@ -0,0 +1,107 @@
pylice (0.9.0) unstable; urgency=low
* See ChangeLog
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Mon, 21 Feb 2000 16:26:22 +0100
pylice (0.8.0) unstable; urgency=low
* See ChangeLog
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Thu, 10 Feb 2000 21:32:55 +0000
pylice (0.7.0) unstable; urgency=low
* See CHANGES
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Thu, 27 Jan 2000 23:15:24 +0100
pylice (0.6.2) unstable; urgency=low
* See CHANGES
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Wed, 26 Jan 2000 11:41:28 +0100
pylice (0.6.1) unstable; urgency=low
* See CHANGES
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Tue, 25 Jan 2000 21:11:15 +0100
pylice (0.6.0) unstable; urgency=low
* See CHANGES
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Wed, 19 Jan 2000 00:25:55 +0100
pylice (0.5.0) unstable; urgency=low
* See CHANGES
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Tue, 18 Jan 2000 00:39:31 +0100
pylice (0.4.4) unstable; urgency=low
* See CHANGES
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Mon, 17 Jan 2000 12:21:10 +0100
pylice (0.4.3) unstable; urgency=low
* See CHANGES
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Fri, 14 Jan 2000 02:10:20 +0100
pylice (0.4.2) unstable; urgency=low
* See CHANGES
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Thu, 13 Jan 2000 21:48:23 +0100
pylice (0.4.1) unstable; urgency=low
* See CHANGES
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Wed, 12 Jan 2000 13:34:42 +0100
pylice (0.4.0) unstable; urgency=low
* See CHANGES
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Tue, 11 Jan 2000 13:48:53 +0100
pylice (0.3.0) unstable; urgency=low
* See CHANGES
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Tue, 11 Jan 2000 00:01:37 +0100
pylice (0.2.1) unstable; urgency=low
* See CHANGES
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Mon, 10 Jan 2000 22:01:54 +0100
pylice (0.2.0) unstable; urgency=low
* See CHANGES
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Mon, 10 Jan 2000 21:28:38 +0100
pylice (0.1.0) unstable; urgency=low
* New release. Missing features: robots.txt exclusion standard,
proxy setting, user/password setting and
mailto:, telnet:, gopher: link checking
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Sun, 9 Jan 2000 14:32:15 +0100
pylice (0.0.1) unstable; urgency=low
* Initial Release.
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Sat, 8 Jan 2000 11:00:35 +0100
Local variables:
mode: debian-changelog
End:

21
debian/control vendored Normal file
View file

@ -0,0 +1,21 @@
Source: linkchecker
Section: web
Priority: optional
Maintainer: Bastian Kleineidam <calvin@cs.uni-sb.de>
Standards-Version: 3.0.1
Package: linkchecker
Architecture: any
Depends: python-base
Description: LinkChecker is a link checker written in Python
With LinkChecker you can check your HTML documents for broken links.
Features:
o recursive checking
o multithreaded
o output can be colored or normal text, HTML, SQL or a GML sitemap graph
o HTTP, FTP, mailto:, Gopher, Telnet and local file links are supported
Javascript and HTTPS links are currently ignored
o restrict link checking to your local domain
o HTTP proxy support
o give username/password for HTTP and FTP authorization
o robots.txt exclusion protocol support

22
debian/copyright vendored Normal file
View file

@ -0,0 +1,22 @@
This is linkchecker, written and maintained by Bastian Kleineidam <calvin@cs.uni-sb.de>
on Sat, 8 Jan 2000 11:00:35 +0100.
The original source can always be found at:
http://linkchecker.sourceforge.net
Copyright (C) 2000 Bastian Kleineidam
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License with
the Debian GNU/Linux distribution in file /usr/doc/copyright/GPL;
if not, write to the Free Software Foundation, Inc., 59 Temple Place,
Suite 330, Boston, MA 02111-1307 USA

0
debian/dirs vendored Normal file
View file

4
debian/docs vendored Normal file
View file

@ -0,0 +1,4 @@
INSTALL
README
README.dns
ChangeLog

22
debian/ex.doc-base.package vendored Normal file
View file

@ -0,0 +1,22 @@
Document: pylice
Title: Debian pylice Manual
Author: <insert document author here>
Abstract: This manual describes what pylice is
and how it can be used to
manage online manuals on Debian systems.
Section: unknown
Format: debiandoc-sgml
Files: /usr/doc/pylice/pylice.sgml.gz
Format: postscript
Files: /usr/doc/pylice/pylice.ps.gz
Format: text
Files: /usr/doc/pylice/pylice.text.gz
Format: HTML
Index: /usr/doc/pylice/html/index.html
Files: /usr/doc/pylice/html/*.html

1
debian/files vendored Normal file
View file

@ -0,0 +1 @@
pylice_0.8.0_i386.deb web optional

70
debian/init.d.ex vendored Normal file
View file

@ -0,0 +1,70 @@
#! /bin/sh
#
# skeleton example file to build /etc/init.d/ scripts.
# This file should be used to construct scripts for /etc/init.d.
#
# Written by Miquel van Smoorenburg <miquels@cistron.nl>.
# Modified for Debian GNU/Linux
# by Ian Murdock <imurdock@gnu.ai.mit.edu>.
#
# Version: @(#)skeleton 1.8 03-Mar-1998 miquels@cistron.nl
#
# This file was automatically customized by dh-make on Sat, 8 Jan 2000 11:00:35 +0100
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
DAEMON=/usr/sbin/pylice
NAME=pylice
DESC=pylice
test -f $DAEMON || exit 0
set -e
case "$1" in
start)
echo -n "Starting $DESC: "
start-stop-daemon --start --quiet --pidfile /var/run/$NAME.pid \
--exec $DAEMON
echo "$NAME."
;;
stop)
echo -n "Stopping $DESC: "
start-stop-daemon --stop --quiet --pidfile /var/run/$NAME.pid \
--exec $DAEMON
echo "$NAME."
;;
#reload)
#
# If the daemon can reload its config files on the fly
# for example by sending it SIGHUP, do it here.
#
# If the daemon responds to changes in its config file
# directly anyway, make this a do-nothing entry.
#
# echo "Reloading $DESC configuration files."
# start-stop-daemon --stop --signal 1 --quiet --pidfile \
# /var/run/$NAME.pid --exec $DAEMON
#;;
restart|force-reload)
#
# If the "reload" option is implemented, move the "force-reload"
# option to the "reload" entry above. If not, "force-reload" is
# just the same as "restart".
#
echo -n "Restarting $DESC: "
start-stop-daemon --stop --quiet --pidfile \
/var/run/$NAME.pid --exec $DAEMON
sleep 1
start-stop-daemon --start --quiet --pidfile \
/var/run/$NAME.pid --exec $DAEMON
echo "$NAME."
;;
*)
N=/etc/init.d/$NAME
# echo "Usage: $N {start|stop|restart|reload|force-reload}" >&2
echo "Usage: $N {start|stop|restart|force-reload}" >&2
exit 1
;;
esac
exit 0

60
debian/manpage.1.ex vendored Normal file
View file

@ -0,0 +1,60 @@
.\" Hey, EMACS: -*- nroff -*-
.\" First parameter, NAME, should be all caps
.\" Second parameter, SECTION, should be 1-8, maybe w/ subsection
.\" other parameters are allowed: see man(7), man(1)
.TH PYLICE SECTION "January 8, 2000"
.\" Please adjust this date whenever revising the manpage.
.\"
.\" Some roff macros, for reference:
.\" .nh disable hyphenation
.\" .hy enable hyphenation
.\" .ad l left justify
.\" .ad b justify to both left and right margins
.\" .nf disable filling
.\" .fi enable filling
.\" .br insert line break
.\" .sp <n> insert n+1 empty lines
.\" for manpage-specific macros, see man(7)
.SH NAME
pylice \- program to do something
.SH SYNOPSIS
.B pylice
.RI [ options ] " files" ...
.br
.B bar
.RI [ options ] " files" ...
.SH DESCRIPTION
This manual page documents briefly the
.B pylice
and
.B bar
commands.
This manual page was written for the Debian GNU/Linux distribution
because the original program does not have a manual page.
Instead, it has documentation in the GNU Info format; see below.
.PP
.\" TeX users may be more comfortable with the \fB<whatever>\fP and
.\" \fI<whatever>\fP escape sequences to invode bold face and italics,
.\" respectively.
\fBpylice\fP is a program that...
.SH OPTIONS
These programs follow the usual GNU command line syntax, with long
options starting with two dashes (`-').
A summary of options is included below.
For a complete description, see the Info files.
.TP
.B \-h, \-\-help
Show summary of options.
.TP
.B \-v, \-\-version
Show version of program.
.SH SEE ALSO
.BR bar (1),
.BR baz (1).
.br
The programs are documented fully by
.IR "The Rise and Fall of a Fooish Bar" ,
available via the Info system.
.SH AUTHOR
This manual page was written by Bastian Kleineidam <calvin@mail.calvinsplayground.de>,
for the Debian GNU/Linux system (but may be used by others).

2
debian/menu.ex vendored Normal file
View file

@ -0,0 +1,2 @@
?package(linkchecker):needs=X11|text|vc|wm section=Apps/see-menu-manual\
title="linkchecer" command="/usr/bin/linkchecker"

7
debian/postinst.debhelper vendored Normal file
View file

@ -0,0 +1,7 @@
# Automatically added by dh_installdocs
if [ "$1" = "configure" ]; then
if [ -d /usr/doc -a ! -e /usr/doc/pylice -a -d /usr/share/doc/pylice ]; then
ln -sf ../share/doc/pylice /usr/doc/pylice
fi
fi
# End automatically added section

47
debian/postinst.ex vendored Normal file
View file

@ -0,0 +1,47 @@
#! /bin/sh
# postinst script for pylice
#
# see: dh_installdeb(1)
set -e
# summary of how this script can be called:
# * <postinst> `configure' <most-recently-configured-version>
# * <old-postinst> `abort-upgrade' <new version>
# * <conflictor's-postinst> `abort-remove' `in-favour' <package>
# <new-version>
# * <deconfigured's-postinst> `abort-deconfigure' `in-favour'
# <failed-install-package> <version> `removing'
# <conflicting-package> <version>
# for details, see /usr/doc/packaging-manual/
#
# quoting from the policy:
# Any necessary prompting should almost always be confined to the
# post-installation script, and should be protected with a conditional
# so that unnecessary prompting doesn't happen if a package's
# installation fails and the `postinst' is called with `abort-upgrade',
# `abort-remove' or `abort-deconfigure'.
case "$1" in
configure)
;;
abort-upgrade|abort-remove|abort-deconfigure)
;;
*)
echo "postinst called with unknown argument \`$1'" >&2
exit 0
;;
esac
# dh_installdeb will replace this with shell code automatically
# generated by other debhelper scripts.
#DEBHELPER#
exit 0

38
debian/postrm.ex vendored Normal file
View file

@ -0,0 +1,38 @@
#! /bin/sh
# postrm script for pylice
#
# see: dh_installdeb(1)
set -e
# summary of how this script can be called:
# * <postrm> `remove'
# * <postrm> `purge'
# * <old-postrm> `upgrade' <new-version>
# * <new-postrm> `failed-upgrade' <old-version>
# * <new-postrm> `abort-install'
# * <new-postrm> `abort-install' <old-version>
# * <new-postrm> `abort-upgrade' <old-version>
# * <disappearer's-postrm> `disappear' <r>overwrit>r> <new-version>
# for details, see /usr/doc/packaging-manual/
case "$1" in
purge|remove|upgrade|failed-upgrade|abort-install|abort-upgrade|disappear)
# update the menu system
# if [ -x /usr/bin/update-menus ]; then update-menus; fi
;;
*)
echo "postrm called with unknown argument \`$1'" >&2
exit 0
esac
# dh_installdeb will replace this with shell code automatically
# generated by other debhelper scripts.
#DEBHELPER#

40
debian/preinst.ex vendored Normal file
View file

@ -0,0 +1,40 @@
#! /bin/sh
# preinst script for pylice
#
# see: dh_installdeb(1)
set -e
# summary of how this script can be called:
# * <new-preinst> `install'
# * <new-preinst> `install' <old-version>
# * <new-preinst> `upgrade' <old-version>
# * <old-preinst> `abort-upgrade' <new-version>
case "$1" in
install|upgrade)
# if [ "$1" = "upgrade" ]
# then
# start-stop-daemon --stop --quiet --oknodo \
# --pidfile /var/run/pylice.pid \
# --exec /usr/sbin/pylice 2>/dev/null || true
# fi
;;
abort-upgrade)
;;
*)
echo "preinst called with unknown argument \`$1'" >&2
exit 0
;;
esac
# dh_installdeb will replace this with shell code automatically
# generated by other debhelper scripts.
#DEBHELPER#
exit 0

5
debian/prerm.debhelper vendored Normal file
View file

@ -0,0 +1,5 @@
# Automatically added by dh_installdocs
if [ \( "$1" = "upgrade" -o "$1" = "remove" \) -a -L /usr/doc/pylice ]; then
rm -f /usr/doc/pylice
fi
# End automatically added section

37
debian/prerm.ex vendored Normal file
View file

@ -0,0 +1,37 @@
#! /bin/sh
# prerm script for pylice
#
# see: dh_installdeb(1)
set -e
# summary of how this script can be called:
# * <prerm> `remove'
# * <old-prerm> `upgrade' <new-version>
# * <new-prerm> `failed-upgrade' <old-version>
# * <conflictor's-prerm> `remove' `in-favour' <package> <new-version>
# * <deconfigured's-prerm> `deconfigure' `in-favour'
# <package-being-installed> <version> `removing'
# <conflicting-package> <version>
# for details, see /usr/doc/packaging-manual/
case "$1" in
remove|upgrade|deconfigure)
# install-info --quiet --remove /usr/info/pylice.info.gz
;;
failed-upgrade)
;;
*)
echo "prerm called with unknown argument \`$1'" >&2
exit 0
;;
esac
# dh_installdeb will replace this with shell code automatically
# generated by other debhelper scripts.
#DEBHELPER#
exit 0

76
debian/rules vendored Executable file
View file

@ -0,0 +1,76 @@
#!/usr/bin/make -f
# Sample debian/rules that uses debhelper.
# GNU copyright 1997 to 1999 by Joey Hess.
# Uncomment this to turn on verbose mode.
#export DH_VERBOSE=1
# This is the debhelper compatability version to use.
export DH_COMPAT=1
build: build-stamp
build-stamp:
dh_testdir
# Add here commands to compile the package.
$(MAKE)
touch build-stamp
clean:
dh_testdir
# dh_testroot
rm -f build-stamp
# Add here commands to clean up after the build process.
-$(MAKE) clean
dh_clean
install: build
dh_testdir
# dh_testroot
dh_clean -k
# dh_installdirs
# Add here commands to install the package into debian/tmp.
$(MAKE) install DESTDIR=`pwd`/debian/tmp
# Build architecture-independent files here.
binary-indep: build install
# We have nothing to do by default.
# Build architecture-dependent files here.
binary-arch: build install
# dh_testversion
dh_testdir
# dh_testroot
# dh_installdebconf
dh_installdocs
# dh_installexamples
# dh_installmenu
# dh_installemacsen
# dh_installpam
# dh_installinit
# dh_installcron
# dh_installmanpages
# dh_installinfo
dh_undocumented
# dh_installchangelogs
# dh_link
# dh_strip
dh_compress
dh_fixperms
# You may want to make some executables suid here.
# dh_suidregister
# dh_makeshlibs
dh_installdeb
# dh_perl
# dh_shlibdeps
dh_gencontrol
dh_md5sums
dh_builddeb
binary: binary-indep binary-arch
.PHONY: build clean binary-indep binary-arch binary install

5
debian/watch.ex vendored Normal file
View file

@ -0,0 +1,5 @@
# Example watch control file for uscan
# Rename this file to "watch" and then you can run the "uscan" command
# to check for upstream updates and more.
# Site Directory Pattern Version Script
sunsite.unc.edu /pub/Linux/Incoming pylice-(.*)\.tar\.gz debian uupdate

382
httplib.py Normal file
View file

@ -0,0 +1,382 @@
#
# HTTP/1.1 client library
#
# Copyright (C) 1998-1999 Guido van Rossum. All Rights Reserved.
# Written by Greg Stein. Given to Guido. Licensed using the Python license.
#
# This module is maintained by Greg and is available at:
# http://www.lyra.org/greg/python/httplib.py
#
# Since this isn't in the Python distribution yet, we'll use the CVS ID
# for tracking:
# $Id$
#
import socket,string,mimetools,httplib
error = __name__ + '.error'
HTTP_PORT = 80
class HTTPResponse(mimetools.Message):
def __init__(self, fp, version, errcode):
mimetools.Message.__init__(self, fp, 0)
if version == 'HTTP/1.0':
self.version = 10
elif version[:7] == 'HTTP/1.':
self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
else:
raise error, 'unknown HTTP protocol'
# are we using the chunked-style of transfer encoding?
tr_enc = self.getheader('transfer-encoding')
if tr_enc:
if string.lower(tr_enc) != 'chunked':
raise error, 'unknown transfer-encoding'
self.chunked = 1
self.chunk_left = None
else:
self.chunked = 0
# will the connection close at the end of the response?
conn = self.getheader('connection')
if conn:
conn = string.lower(conn)
# a "Connection: close" will always close the connection. if we
# don't see that and this is not HTTP/1.1, then the connection will
# close unless we see a Keep-Alive header.
self.will_close = string.find(conn, 'close') != -1 or \
( self.version != 11 and \
not self.getheader('keep-alive') )
else:
# for HTTP/1.1, the connection will always remain open
# otherwise, it will remain open IFF we see a Keep-Alive header
self.will_close = self.version != 11 and \
not self.getheader('keep-alive')
# do we have a Content-Length?
# NOTE: RFC 2616, S4.4, #3 states we ignore this if tr_enc is "chunked"
length = self.getheader('content-length')
if length and not self.chunked:
self.length = int(length)
else:
self.length = None
# does the body have a fixed length? (of zero)
if (errcode == 204 or # No Content
errcode == 304 or # Not Modified
100 <= errcode < 200): # 1xx codes
self.length = 0
# if the connection remains open, and we aren't using chunked, and
# a content-length was not provided, then assume that the connection
# WILL close.
if not self.will_close and \
not self.chunked and \
self.length is None:
self.will_close = 1
def close(self):
if self.fp:
self.fp.close()
self.fp = None
def isclosed(self):
# NOTE: it is possible that we will not ever call self.close(). This
# case occurs when will_close is TRUE, length is None, and we
# read up to the last byte, but NOT past it.
#
# IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
# called, meaning self.isclosed() is meaningful.
return self.fp is None
def read(self, amt=None):
if not self.fp:
return ''
if self.chunked:
chunk_left = self.chunk_left
value = ''
while 1:
if not chunk_left:
line = self.fp.readline()
i = string.find(line, ';')
if i >= 0:
line = line[:i] # strip chunk-extensions
chunk_left = string.atoi(line, 16)
if chunk_left == 0:
break
if not amt:
value = value + self.fp.read(chunk_left)
elif amt < chunk_left:
value = value + self.fp.read(amt)
self.chunk_left = chunk_left - amt
return value
elif amt == chunk_left:
value = value + self.fp.read(amt)
self.fp.read(2) # toss the CRLF at the end of the chunk
self.chunk_left = None
return value
else:
value = value + self.fp.read(chunk_left)
amt = amt - chunk_left
# we read the whole chunk, get another
self.fp.read(2) # toss the CRLF at the end of the chunk
chunk_left = None
# read and discard trailer up to the CRLF terminator
### note: we shouldn't have any trailers!
while 1:
line = self.fp.readline()
if line == '\r\n':
break
# we read everything; close the "file"
self.close()
return value
elif not amt:
# unbounded read
if self.will_close:
s = self.fp.read()
else:
s = self.fp.read(self.length)
self.close() # we read everything
return s
if self.length is not None:
if amt > self.length:
# clip the read to the "end of response"
amt = self.length
self.length = self.length - amt
s = self.fp.read(amt)
# close our "file" if we know we should
### I'm not sure about the len(s) < amt part; we should be safe because
### we shouldn't be using non-blocking sockets
if self.length == 0 or len(s) < amt:
self.close()
return s
class HTTPConnection:
_http_vsn = 11
_http_vsn_str = 'HTTP/1.1'
response_class = HTTPResponse
def __init__(self, host, port=None):
self.sock = None
self.response = None
self._set_hostport(host, port)
def _set_hostport(self, host, port):
if port is None:
i = string.find(host, ':')
if i >= 0:
port = int(host[i+1:])
host = host[:i]
else:
port = HTTP_PORT
self.host = host
self.port = port
def connect(self):
"""Connect to the host and port specified in __init__."""
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.sock.connect(self.host, self.port)
def close(self):
"""Close the connection to the HTTP server."""
if self.sock:
self.sock.close() # close it manually... there may be other refs
self.sock = None
if self.response:
self.response.close()
self.response = None
def send(self, str):
"""Send `str' to the server."""
if not self.sock:
self.connect()
# send the data to the server. if we get a broken pipe, then close
# the socket. we want to reconnect when somebody tries to send again.
#
# NOTE: we DO propagate the error, though, because we cannot simply
# ignore the error... the caller will know if they can retry.
try:
self.sock.send(str)
except socket.error, v:
if v[0] == 32: # Broken pipe
self.close()
raise
def putrequest(self, method, url='/'):
"""Send a request to the server.
`method' specifies an HTTP request method, e.g. 'GET'.
`url' specifies the object being requested, e.g.
'/index.html'.
"""
if self.response:
if not self.response.isclosed():
### implies half-duplex!
raise error, 'prior response has not been fully handled'
self.response = None
if not url:
url = '/'
str = '%s %s %s\r\n' % (method, url, self._http_vsn_str)
try:
self.send(str)
except socket.error, v:
if v[0] != 32: # Broken pipe
raise
# try one more time (the socket was closed; this will reopen)
self.send(str)
self.putheader('Host', self.host)
if self._http_vsn == 11:
# Issue some standard headers for better HTTP/1.1 compliance
# note: we are assuming that clients will not attempt to set these
# headers since *this* library must deal with the consequences.
# this also means that when the supporting libraries are
# updated to recognize other forms, then this code should be
# changed (removed or updated).
# we only want a Content-Encoding of "identity" since we don't
# support encodings such as x-gzip or x-deflate.
self.putheader('Accept-Encoding', 'identity')
# we can accept "chunked" Transfer-Encodings, but no others
# NOTE: no TE header implies *only* "chunked"
#self.putheader('TE', 'chunked')
# if TE is supplied in the header, then it must appear in a
# Connection header.
#self.putheader('Connection', 'TE')
else:
# For HTTP/1.0, the server will assume "not chunked"
pass
def putheader(self, header, value):
"""Send a request header line to the server.
For example: h.putheader('Accept', 'text/html')
"""
str = '%s: %s\r\n' % (header, value)
self.send(str)
def endheaders(self):
"""Indicate that the last header line has been sent to the server."""
self.send('\r\n')
def request(self, method, url='/', body=None, headers={}):
"""Send a complete request to the server."""
self.putrequest(method, url)
if body:
self.putheader('Content-Length', str(len(body)))
for hdr, value in headers.items():
self.putheader(hdr, value)
self.endheaders()
if body:
self.send(body)
def getreply(self):
"""Get a reply from the server.
Returns a tuple consisting of:
- server response code (e.g. '200' if all goes well)
- server response string corresponding to response code
- any RFC822 headers in the response from the server
"""
file = self.sock.makefile('rb')
line = file.readline()
try:
[ver, code, msg] = string.split(line, None, 2)
except ValueError:
try:
[ver, code] = string.split(line, None, 1)
msg = ""
except ValueError:
self.close()
return -1, line, file
if ver[:5] != 'HTTP/':
self.close()
return -1, line, file
errcode = int(code)
errmsg = string.strip(msg)
response = self.response_class(file, ver, errcode)
if response.will_close:
# this effectively passes the connection to the response
self.close()
else:
# remember this, so we can tell when it is complete
self.response = response
return errcode, errmsg, response
class HTTP(HTTPConnection):
"Compatibility class with httplib.py from 1.5."
_http_vsn = 10
_http_vsn_str = 'HTTP/1.0'
def __init__(self, host='', port=None):
"Provide a default host, since the superclass requires one."
# Note that we may pass an empty string as the host; this will throw
# an error when we attempt to connect. Presumably, the client code
# will call connect before then, with a proper host.
HTTPConnection.__init__(self, host, port)
def connect(self, host=None, port=None):
"Accept arguments to set the host/port, since the superclass doesn't."
if host:
self._set_hostport(host, port)
HTTPConnection.connect(self)
def set_debuglevel(self, debuglevel):
"The class no longer supports the debuglevel."
pass
def getfile(self):
"Provide a getfile, since the superclass' use of HTTP/1.1 prevents it."
return self.file
def putheader(self, header, *values):
"The superclass allows only one value argument."
HTTPConnection.putheader(self, header, string.joinfields(values,'\r\n\t'))
def getreply(self):
"Compensate for an instance attribute shuffling."
errcode, errmsg, response = HTTPConnection.getreply(self)
if errcode == -1:
self.file = response # response is the "file" when errcode==-1
self.headers = None
return -1, errmsg, None
self.headers = response
self.file = response.fp
return errcode, errmsg, response

87
lc.cgi Executable file
View file

@ -0,0 +1,87 @@
#!/usr/bin/env python
import re,cgi,sys,urlparse,time,os
# configuration
sys.stderr = sys.stdout
cgi_dir = "/home/calvin/public_html/cgi-bin"
dist_dir = "/home/calvin/linkchecker-1.1.0"
lc = pylice_dir + "/pylice"
sys.path.insert(0,dist_dir)
cgi.logfile = cgi_dir + "/lc.log"
def testit():
cgi.test()
sys.exit(0)
def checkform():
for key in ["level","url"]:
if not form.has_key(key) or form[key].value == "": return 0
if not re.match(r"^http://[-\w./~]+$", form["url"].value): return 0
if not re.match(r"\d", form["level"].value): return 0
if int(form["level"].value) > 3: return 0
if form.has_key("anchors"):
if not form["anchors"].value=="on": return 0
if form.has_key("errors"):
if not form["errors"].value=="on": return 0
if form.has_key("intern"):
if not form["intern"].value=="on": return 0
return 1
def getHostName():
return urlparse.urlparse(form["url"].value)[1]
def logit():
logfile = open("/home/calvin/log/linkchecker.log","a")
logfile.write("\n"+time.strftime("%d.%m.%Y %H:%M:%S", time.localtime(time.time()))+"\n")
for var in ["HTTP_USER_AGENT","REMOTE_ADDR","REMOTE_HOST","REMOTE_PORT"]:
if os.environ.has_key(var):
logfile.write(var+"="+os.environ[var]+"\n")
for key in ["level","url","anchors","errors","intern"]:
if form.has_key(key):
logfile.write(str(form[key])+"\n")
logfile.close()
def printError():
print """<html><head></head>
<body text="#192c83" bgcolor="#fff7e5" link="#191c83" vlink="#191c83"
alink="#191c83" >
<blockquote>
<b>Error</b><br>
The LinkChecker Online script has encountered an error. Please ensure
that your provided URL link begins with <code>http://</code> and
contains only these characters: <code>A-Za-z0-9./_~-</code><br><br>
Errors are logged.
</blockquote>
</body>
</html>
"""
# main
print "Content-type: text/html"
print
#testit()
form = cgi.FieldStorage()
if not checkform():
logit()
printError()
sys.exit(0)
args=["", "-H", "-r "+form["level"].value, "-s"]
if form.has_key("anchors"):
args.append("-a")
if not form.has_key("errors"):
args.append("-v")
if form.has_key("intern"):
args.append("--intern=^(ftp|http)://"+getHostName())
else:
args.append("--extern=^file:")
args.append("--intern=.+")
args.append(form["url"].value)
sys.argv = args
execfile(lc)

307
linkcheck/Config.py Normal file
View file

@ -0,0 +1,307 @@
import ConfigParser,sys,os,re,UserDict
from os.path import expanduser,normpath,normcase,join,isfile
import Logging
Version = "1.1.0"
AppName = "LinkChecker"
App = AppName+" "+Version
UserAgent = AppName+"/"+Version
Author = "Bastian Kleineidam"
Copyright = "Copyright © 2000 by "+Author
HtmlCopyright = "Copyright &copy; 2000 by "+Author
AppInfo = App+" "+Copyright
HtmlAppInfo = App+", "+HtmlCopyright
Url = "http://pylice.sourceforge.net/"
Email = "calvin@users.sourceforge.net"
Freeware = AppName+""" comes with ABSOLUTELY NO WARRANTY!
This is free software, and you are welcome to redistribute it
under certain conditions. Look at the file `LICENSE' whithin this
distribution."""
Loggers = {"text": Logging.StandardLogger,
"html": Logging.HtmlLogger,
"colored": Logging.ColoredLogger,
"gml": Logging.GMLLogger,
"sql": Logging.SQLLogger}
LoggerKeys = reduce(lambda x, y: x+", "+y, Loggers.keys())
DebugDelim = "==========================================================\n"
DebugFlag = 0
# note: debugging with more than 1 thread can be painful
def debug(msg):
if DebugFlag:
sys.stderr.write(msg)
sys.stderr.flush()
def _norm(path):
return normcase(normpath(expanduser(path)))
class Configuration(UserDict.UserDict):
def __init__(self):
UserDict.UserDict.__init__(self)
self.data["log"] = Loggers["text"]()
self.data["verbose"] = 0
self.data["warnings"] = 0
self.data["anchors"] = 0
self.data["externlinks"] = []
self.data["internlinks"] = []
self.data["allowdeny"] = 0
self.data["user"] = "anonymous"
self.data["password"] = "joe@"
self.data["proxy"] = 0
self.data["proxyport"] = 8080
self.data["recursionlevel"] = 1
self.data["robotstxt"] = 0
self.data["strict"] = 0
self.data["fileoutput"] = []
self.data["quiet"] = 0
self.urlCache = {}
self.robotsTxtCache = {}
try:
from threading import *
self.enableThreading(5)
except:
type, value = sys.exc_info()[:2]
self.disableThreading()
def disableThreading(self):
self.data["threads"] = 0
self.hasMoreUrls = self.hasMoreUrls_NoThreads
self.finished = self.finished_NoThreads
self.finish = self.finish_NoThreads
self.appendUrl = self.appendUrl_NoThreads
self.getUrl = self.getUrl_NoThreads
self.checkUrl = self.checkUrl_NoThreads
self.urlCache_has_key = self.urlCache_has_key_NoThreads
self.urlCache_get = self.urlCache_get_NoThreads
self.urlCache_set = self.urlCache_set_NoThreads
self.robotsTxtCache_has_key = self.robotsTxtCache_has_key_NoThreads
self.robotsTxtCache_get = self.robotsTxtCache_get_NoThreads
self.robotsTxtCache_set = self.robotsTxtCache_set_NoThreads
self.log_newUrl = self.log_newUrl_NoThreads
self.urls = []
self.threader = None
def enableThreading(self, num):
import Queue,Threader
from threading import Lock
self.data["threads"] = 1
self.hasMoreUrls = self.hasMoreUrls_Threads
self.finished = self.finished_Threads
self.finish = self.finish_Threads
self.appendUrl = self.appendUrl_Threads
self.getUrl = self.getUrl_Threads
self.checkUrl = self.checkUrl_Threads
self.urlCache_has_key = self.urlCache_has_key_Threads
self.urlCache_get = self.urlCache_get_Threads
self.urlCache_set = self.urlCache_set_Threads
self.urlCacheLock = Lock()
self.robotsTxtCache_has_key = self.robotsTxtCache_has_key_Threads
self.robotsTxtCache_get = self.robotsTxtCache_get_Threads
self.robotsTxtCache_set = self.robotsTxtCache_set_Threads
self.robotsTxtCacheLock = Lock()
self.log_newUrl = self.log_newUrl_Threads
self.logLock = Lock()
self.urls = Queue.Queue(0)
self.threader = Threader.Threader(num)
def hasMoreUrls_NoThreads(self):
return len(self.urls)
def finished_NoThreads(self):
return not self.hasMoreUrls_NoThreads()
def finish_NoThreads(self):
pass
def appendUrl_NoThreads(self, url):
self.urls.append(url)
def getUrl_NoThreads(self):
return self.urls.pop(0)
def checkUrl_NoThreads(self, url):
url.check(self)
def urlCache_has_key_NoThreads(self, key):
return self.urlCache.has_key(key)
def urlCache_get_NoThreads(self, key):
return self.urlCache[key]
def urlCache_set_NoThreads(self, key, val):
self.urlCache[key] = val
def robotsTxtCache_has_key_NoThreads(self, key):
return self.robotsTxtCache.has_key(key)
def robotsTxtCache_get_NoThreads(self, key):
return self.robotsTxtCache[key]
def robotsTxtCache_set_NoThreads(self, key, val):
self.robotsTxtCache[key] = val
def log_newUrl_NoThreads(self, url):
if not self.data["quiet"]: self.data["log"].newUrl(url)
for log in self.data["fileoutput"]:
log.newUrl(url)
def log_init(self):
if not self.data["quiet"]: self.data["log"].init()
for log in self.data["fileoutput"]:
log.init()
def log_endOfOutput(self):
if not self.data["quiet"]: self.data["log"].endOfOutput()
for log in self.data["fileoutput"]:
log.endOfOutput()
def hasMoreUrls_Threads(self):
return not self.urls.empty()
def finished_Threads(self):
self.threader.reduceThreads()
return not self.hasMoreUrls() and self.threader.finished()
def finish_Threads(self):
self.threader.finish()
def appendUrl_Threads(self, url):
self.urls.put(url)
def getUrl_Threads(self):
return self.urls.get()
def checkUrl_Threads(self, url):
self.threader.startThread(url.check, (self,))
def urlCache_has_key_Threads(self, key):
self.urlCacheLock.acquire()
ret = self.urlCache.has_key(key)
self.urlCacheLock.release()
return ret
def urlCache_get_Threads(self, key):
self.urlCacheLock.acquire()
ret = self.urlCache[key]
self.urlCacheLock.release()
return ret
def urlCache_set_Threads(self, key, val):
self.urlCacheLock.acquire()
self.urlCache[key] = val
self.urlCacheLock.release()
def robotsTxtCache_has_key_Threads(self, key):
self.robotsTxtCacheLock.acquire()
ret = self.robotsTxtCache.has_key(key)
self.robotsTxtCacheLock.release()
return ret
def robotsTxtCache_get_Threads(self, key):
self.robotsTxtCacheLock.acquire()
ret = self.robotsTxtCache[key]
self.robotsTxtCacheLock.release()
return ret
def robotsTxtCache_set_Threads(self, key, val):
self.robotsTxtCacheLock.acquire()
self.robotsTxtCache[key] = val
self.robotsTxtCacheLock.release()
def log_newUrl_Threads(self, url):
self.logLock.acquire()
if not self.data["quiet"]: self.data["log"].newUrl(url)
for log in self.data["fileoutput"]:
log.newUrl(url)
self.logLock.release()
def read(self, files = []):
files.insert(0,_norm("~/.pylicerc"))
if sys.platform=="win32":
if not sys.path[0]:
path=os.getcwd()
else:
path=sys.path[0]
else:
path="/etc"
files.insert(0,_norm(join(path, "pylicerc")))
if len(files):
self.readConfig(files)
def warn(self, msg):
self.message("Config: WARNING: "+msg)
def error(self, msg):
self.message("Config: ERROR: "+msg)
def message(self, msg):
sys.stderr.write(msg+"\n")
sys.stderr.flush()
def readConfig(self, files):
try:
cfgparser = ConfigParser.ConfigParser()
cfgparser.read(files)
except: return
section="output"
try:
log = cfgparser.get(section, "log")
if Loggers.has_key(log):
self.data["log"] = Loggers[log]()
else:
self.warn("invalid log option "+log)
except: pass
try:
if cfgparser.getboolean(section, "verbose"):
self.data["verbose"] = 1
self.data["warnings"] = 1
except: pass
try: self.data["quiet"] = cfgparser.getboolean(section, "quiet")
except: pass
try: self.data["warnings"] = cfgparser.getboolean(section, "warnings")
except: pass
section="checking"
try:
num = cfgparser.getint(section, "threads")
if num<=0:
self.disableThreads()
else:
self.enableThreads(num)
except: pass
try: self.data["anchors"] = cfgparser.getboolean(section, "anchors")
except: pass
try: self.data["externlinks"].append(re.compile(cfgparser.get(section, "externlinks")))
except: pass
try: self.data["internlinks"].append(re.compile(cfgparser.get(section, "internlinks")))
except: pass
try: self.data["allowdeny"] = cfgparser.getboolean(section, "allowdeny")
except: pass
try: self.data["password"] = cfgparser.get(section, "password")
except: pass
try: self.data["user"] = cfgparser.get(section, "user")
except: pass
try:
self.data["proxy"] = cfgparser.get(section, "proxy")
self.data["proxyport"] = cfgparser.getint(section, "proxyport")
except: pass
try:
num = cfgparser.getint(section, "recursionlevel")
if num<0:
self.error("illegal recursionlevel number: "+`num`)
self.data["recursionlevel"] = num
except: pass
try: self.data["robotstxt"] = cfgparser.getboolean(section, "robotstxt")
except: pass
try: self.data["strict"] = cfgparser.getboolean(section, "strict")
except: pass
try:
filelist = string.split(cfgparser.get(section, "fileoutput"))
for arg in filelist:
if Loggers.has_key(arg):
self.data["fileoutput"].append(Loggers[arg](open("pylice-out."+arg, "w")))
except:
pass

48
linkcheck/FileUrlData.py Normal file
View file

@ -0,0 +1,48 @@
import re,string,os,urlparse
from UrlData import UrlData
from os.path import normpath
class FileUrlData(UrlData):
"Url link with file scheme"
def __init__(self,
urlName,
recursionLevel,
parentName = None,
baseRef = None, line=0, _time=0):
UrlData.__init__(self,
urlName,
recursionLevel,
parentName,
baseRef, line, _time)
if not parentName and not baseRef and \
not re.compile("^file:").search(self.urlName):
winre = re.compile("^[a-zA-Z]:")
if winre.search(self.urlName):
self.adjustWindozePath()
else:
if self.urlName[0:1] != "/":
self.urlName = os.getcwd()+"/"+self.urlName
if winre.search(self.urlName):
self.adjustWindozePath()
self.urlName = "file://"+normpath(self.urlName)
def buildUrl(self):
UrlData.buildUrl(self)
# cut off parameter, query and fragment
self.url = urlparse.urlunparse(self.urlTuple[:3] + ('','',''))
def adjustWindozePath(self):
"c:\\windows ==> /c|\\windows"
self.urlName = "/"+self.urlName[0]+"|"+self.urlName[2:]
def isHtml(self):
return self.valid and re.compile("\.s?html?$").search(self.url)
def __str__(self):
return "File link\n"+UrlData.__str__(self)

26
linkcheck/FtpUrlData.py Normal file
View file

@ -0,0 +1,26 @@
import ftplib
from UrlData import UrlData
class FtpUrlData(UrlData):
"""
Url link with ftp scheme.
"""
def checkConnection(self, config):
self.urlConnection = ftplib.FTP(self.urlTuple[1],
config["user"], config["password"])
info = self.urlConnection.getwelcome()
if not info:
self.closeConnection()
raise Exception, "Got no answer from FTP server"
self.setInfo(info)
def closeConnection(self):
try: self.urlConnection.quit()
except: pass
self.urlConnection = None
def __str__(self):
return "FTP link\n"+UrlData.__str__(self)

View file

@ -0,0 +1,9 @@
from UrlData import UrlData
class GopherUrlData(UrlData):
"Url link with gopher scheme"
def __str__(self):
return "Gopher link\n"+UrlData.__str__(self)

View file

@ -0,0 +1,33 @@
import socket,string
from UrlData import UrlData
class HostCheckingUrlData(UrlData):
"Url link for which we have to connect to a specific host"
def __init__(self,
urlName,
recursionLevel,
parentName = None,
baseRef = None, line=0, _time=0):
UrlData.__init__(self, urlName, recursionLevel, parentName, baseRef,
line, _time)
self.host = None
self.url = urlName
def buildUrl(self):
# to avoid anchor checking
self.urlTuple=None
def getCacheKey(self):
return self.host
def checkConnection(self, config):
ip = socket.gethostbyname(self.host)
self.setValid(self.host+"("+ip+") found")
def closeConnection(self):
UrlData.closeConnection(self)
def __str__(self):
return "host="+`self.host`+"\n"+UrlData.__str__(self)

150
linkcheck/HttpUrlData.py Normal file
View file

@ -0,0 +1,150 @@
import httplib,urlparse,sys,base64,time
from UrlData import UrlData
from RobotsTxt import RobotsTxt
import Config,StringUtil
class HttpUrlData(UrlData):
"Url link with http scheme"
def checkConnection(self, config):
"""
Check a URL with HTTP protocol.
Here is an excerpt from RFC 1945 with common response codes:
The first digit of the Status-Code defines the class of response. The
last two digits do not have any categorization role. There are 5
values for the first digit:
o 1xx: Informational - Not used, but reserved for future use
o 2xx: Success - The action was successfully received,
understood, and accepted.
o 3xx: Redirection - Further action must be taken in order to
complete the request
o 4xx: Client Error - The request contains bad syntax or cannot
be fulfilled
o 5xx: Server Error - The server failed to fulfill an apparently
valid request
The individual values of the numeric status codes defined for
HTTP/1.0, and an example set of corresponding Reason-Phrase's, are
presented below. The reason phrases listed here are only recommended
-- they may be replaced by local equivalents without affecting the
protocol. These codes are fully defined in Section 9.
Status-Code = "200" ; OK
| "201" ; Created
| "202" ; Accepted
| "204" ; No Content
| "301" ; Moved Permanently
| "302" ; Moved Temporarily
| "304" ; Not Modified
| "400" ; Bad Request
| "401" ; Unauthorized
| "403" ; Forbidden
| "404" ; Not Found
| "500" ; Internal Server Error
| "501" ; Not Implemented
| "502" ; Bad Gateway
| "503" ; Service Unavailable
| extension-code
"""
self.mime = None
self.auth = None
self.proxy = config["proxy"]
self.proxyport = config["proxyport"]
if config["robotstxt"] and not self.robotsTxtAllowsUrl(config):
self.setWarning("Access denied by robots.txt, checked only syntax")
return
status, statusText, self.mime = self.getHttpRequest()
Config.debug(str(self.mime))
if status == 401:
self.auth = base64.encodestring(LinkChecker.User+":"+LinkChecker.Password)
status, statusText, self.mime = self.getHttpRequest()
if status >= 400:
self.setError(`status`+" "+statusText)
return
# follow redirections and set self.url to the effective url
tries = 0
redirected = self.urlName
while status in [301,302] and self.mime and tries < 5:
redirected = urlparse.urljoin(redirected, self.mime.getheader("Location"))
self.urlTuple = urlparse.urlparse(redirected)
status, statusText, self.mime = self.getHttpRequest()
Config.debug("\nRedirected\n"+str(self.mime))
tries = tries + 1
effectiveurl = urlparse.urlunparse(self.urlTuple)
if self.url != effectiveurl:
self.setWarning("Effective URL "+effectiveurl)
self.url = effectiveurl
# check final result
if status == 204:
self.setWarning(statusText)
if status >= 400:
self.setError(`status`+" "+statusText)
else:
self.setValid(`status`+" "+statusText)
def getHttpRequest(self, method="HEAD"):
"Put request and return (status code, status text, mime object)"
if self.proxy:
host = self.proxy+":"+`self.proxyport`
else:
host = self.urlTuple[1]
if self.urlConnection:
self.closeConnection()
self.urlConnection = httplib.HTTP(host)
if self.proxy:
path = urlparse.urlunparse(self.urlTuple)
else:
path = self.urlTuple[2]
if self.urlTuple[3] != "":
path = path + ";" + self.urlTuple[3]
if self.urlTuple[4] != "":
path = path + "?" + self.urlTuple[4]
self.urlConnection.putrequest(method, path)
if self.auth:
self.urlConnection.putheader("Authorization", "Basic "+self.auth)
self.urlConnection.putheader("User-agent", Config.UserAgent)
self.urlConnection.endheaders()
return self.urlConnection.getreply()
def getContent(self):
self.closeConnection()
t = time.time()
self.getHttpRequest("GET")
self.urlConnection = self.urlConnection.getfile()
data = StringUtil.stripHtmlComments(self.urlConnection.read())
self.time = time.time() - t
return data
def isHtml(self):
if self.mime:
return self.valid and self.mime.gettype()=="text/html"
return 0
def robotsTxtAllowsUrl(self, config):
try:
if config.robotsTxtCache_has_key(self.urlTuple[1]):
robotsTxt = config.robotsTxtCache_get(self.urlTuple[1])
else:
robotsTxt = RobotsTxt(self.urlTuple[1], Config.UserAgent)
Config.debug("DEBUG: "+str(robotsTxt)+"\n")
config.robotsTxtCache_set(self.urlTuple[1], robotsTxt)
except:
type, value = sys.exc_info()[:2]
Config.debug("Heieiei: "+str(value)+"\n")
return 1
return robotsTxt.allowance(Config.UserAgent, self.urlTuple[2])
def __str__(self):
return "HTTP link\n"+UrlData.__str__(self)
def closeConnection(self):
if self.mime:
try: self.mime.close()
except: pass
self.mime = None
UrlData.closeConnection(self)

13
linkcheck/HttpsUrlData.py Normal file
View file

@ -0,0 +1,13 @@
from UrlData import UrlData
class HttpsUrlData(UrlData):
"Url link with https scheme"
def check(self, config):
self.setWarning("Https url ignored")
self.logMe(config)
def __str__(self):
return "HTTPS link\n"+UrlData.__str__(self)

View file

@ -0,0 +1,13 @@
from UrlData import UrlData
class JavascriptUrlData(UrlData):
"Url link with javascript scheme"
def check(self, config):
self.setWarning("Javascript url ignored")
self.logMe(config)
def __str__(self):
return "Javascript link\n"+UrlData.__str__(self)

360
linkcheck/Logging.py Normal file
View file

@ -0,0 +1,360 @@
import sys,time,Config,StringUtil
# ANSI color codes
ESC="\x1b"
COL_PARENT =ESC+"[37m" # white
COL_URL =ESC+"[35m" # magenta
COL_REAL =ESC+"[35m" # magenta
COL_BASE =ESC+"[36m" # cyan
COL_VALID =ESC+"[1;32m" # green
COL_INVALID =ESC+"[1;31m" # red
COL_INFO =ESC+"[0;37m" # standard
COL_WARNING =ESC+"[1;33m" # yellow
COL_DLTIME =ESC+"[0;37m" # standard
COL_RESET =ESC+"[0m" # reset to standard
# HTML colors
ColorBackground="\"#fff7e5\""
ColorUrl="\"#dcd5cf\""
ColorBorder="\"#000000\""
ColorLink="\"#191c83\""
TableWarning="<td bgcolor=\"#e0954e\">"
TableError="<td bgcolor=\"db4930\">"
TableOK="<td bgcolor=\"3ba557\">"
RowEnd="</td></tr>\n"
MyFont="<font face=\"Lucida,Verdana,Arial,sans-serif,Helvetica\">"
# return current time
def _currentTime():
return time.strftime("%d.%m.%Y %H:%M:%S", time.localtime(time.time()))
class StandardLogger:
"""Standard text logger.
Informal text output format spec:
Output consists of a set of URL logs separated by one or more
blank lines.
A URL log consists of two or more lines. Each line consists of
keyword and data, separated by whitespace.
Keywords:
Real URL (necessary)
Result (necessary)
Base
Parent URL
Info
Warning
D/L Time
Unknown keywords will be ignored.
"""
def __init__(self, fd=sys.stdout):
self.errors=0
self.warnings=0
self.fd = fd
if fd==sys.stdout:
self.willclose=0
else:
self.willclose=1
def init(self):
self.fd.write(Config.AppName+"\n"+\
Config.Freeware+"\n"+\
"Get the newest version at "+Config.Url+"\n"+\
"Write comments and bugs to "+Config.Email+"\n\n"+\
"Start checking at "+_currentTime()+"\n")
self.fd.flush()
def newUrl(self, urldata):
self.fd.write("\nURL "+urldata.urlName)
if urldata.cached:
self.fd.write(" (cached)\n")
else:
self.fd.write("\n")
if urldata.parentName:
self.fd.write("Parent URL "+urldata.parentName+", line "+str(urldata.line)+"\n")
if urldata.baseRef:
self.fd.write("Base "+urldata.baseRef+"\n")
if urldata.url:
self.fd.write("Real URL "+urldata.url+"\n")
if urldata.time:
self.fd.write("D/L Time %.3f seconds\n" % urldata.time)
if urldata.infoString:
self.fd.write("Info "+StringUtil.indent(\
StringUtil.blocktext(urldata.infoString, 65), 11)+"\n")
if urldata.warningString:
self.warnings = self.warnings+1
self.fd.write("Warning "+urldata.warningString+"\n")
self.fd.write("Result ")
if urldata.valid:
self.fd.write(urldata.validString+"\n")
else:
self.errors = self.errors+1
self.fd.write(urldata.errorString+"\n")
self.fd.flush()
def endOfOutput(self):
self.fd.write("\nThats it. ")
if self.warnings==1:
self.fd.write("1 warning, ")
else:
self.fd.write(str(self.warnings)+" warnings, ")
if self.errors==1:
self.fd.write("1 error")
else:
self.fd.write(str(self.errors)+" errors")
self.fd.write(" found.\n")
self.fd.write("Stopped checking at "+_currentTime()+"\n")
self.fd.flush()
self.close()
def close(self):
if self.willclose:
self.fd.close()
class HtmlLogger(StandardLogger):
"""Logger with HTML output"""
def init(self):
self.fd.write("<html><head><title>"+Config.AppName+"</title></head>"+\
"<body bgcolor="+ColorBackground+" link="+ColorLink+\
" vlink="+ColorLink+" alink="+ColorLink+">"+\
"<center><h2>"+MyFont+Config.AppName+"</font>"+\
"</center></h2>"+\
"<br><blockquote>"+Config.Freeware+"<br><br>"+\
"Start checking at "+_currentTime()+"<br><br>")
self.fd.flush()
def newUrl(self, urlData):
self.fd.write("<table align=left border=\"0\" cellspacing=\"0\""+\
" cellpadding=\"1\" bgcolor="+ColorBorder+">"+\
"<tr><td><table align=left border=\"0\" cellspacing=\"0\""+\
" cellpadding=\"3\" bgcolor="+ColorBackground+">"+\
"<tr><td bgcolor="+ColorUrl+">"+\
MyFont+"URL</font></td><td bgcolor="+ColorUrl+">"+MyFont+\
StringUtil.htmlify(urlData.urlName))
if urlData.cached:
self.fd.write("(cached)")
self.fd.write("</font>"+RowEnd)
if urlData.parentName:
self.fd.write("<tr><td>"+MyFont+"Parent URL</font></td><td>"+\
MyFont+"<a href=\""+urlData.parentName+"\">"+\
urlData.parentName+"</a> line "+str(urlData.line)+\
"</font>"+RowEnd)
if urlData.baseRef:
self.fd.write("<tr><td>"+MyFont+"Base</font></td><td>"+MyFont+\
urlData.baseRef+"</font>"+RowEnd)
if urlData.url:
self.fd.write("<tr><td>"+MyFont+"Real URL</font></td><td>"+MyFont+\
"<a href=\""+StringUtil.htmlify(urlData.url)+"\">"+\
urlData.url+"</a></font>"+RowEnd)
if urlData.time:
self.fd.write("<tr><td>"+MyFont+"D/L Time</font></td><td>"+MyFont+\
("%.3f" % urlData.time)+" seconds</font>"+RowEnd)
if urlData.infoString:
self.fd.write("<tr><td>"+MyFont+"Info</font></td><td>"+MyFont+\
StringUtil.htmlify(urlData.infoString)+"</font>"+RowEnd)
if urlData.warningString:
self.warnings = self.warnings+1
self.fd.write("<tr>"+TableWarning+MyFont+"Warning</font></td>"+\
TableWarning+MyFont+urlData.warningString+\
"</font>"+RowEnd)
if urlData.valid:
self.fd.write("<tr>"+TableOK+MyFont+"Result</font></td>"+\
TableOK+MyFont+urlData.validString+"</font>"+RowEnd)
else:
self.errors = self.errors+1
self.fd.write("<tr>"+TableError+MyFont+"Result</font></td>"+\
TableError+MyFont+urlData.errorString+"</font>"+RowEnd)
self.fd.write("</table></td></tr></table><br clear=all><br>")
self.fd.flush()
def endOfOutput(self):
self.fd.write(MyFont+"Thats it. ")
if self.warnings==1:
self.fd.write("1 warning, ")
else:
self.fd.write(str(self.warnings)+" warnings, ")
if self.errors==1:
self.fd.write("1 error")
else:
self.fd.write(str(self.errors)+" errors")
self.fd.write(" found.<br>")
self.fd.write("Stopped checking at"+_currentTime()+\
"</font></blockquote><br><hr noshade size=1><small>"+\
MyFont+Config.HtmlAppInfo+"<br>Get the newest version at "+\
"<a href=\""+Config.Url+"\">"+Config.Url+"</a>.<br>"+\
"Write comments and bugs to <a href=\"mailto:"+\
Config.Email+"\">"+Config.Email+"</a>."+\
"</font></small></body></html>")
self.fd.flush()
self.close()
class ColoredLogger(StandardLogger):
"""ANSI colorized output"""
def __init__(self, fd=sys.stdout):
StandardLogger.__init__(self, fd)
self.currentPage = None
self.prefix = 0
def newUrl(self, urlData):
if urlData.parentName:
if self.currentPage != urlData.parentName:
if self.prefix:
self.fd.write("o\n")
self.fd.write("\nParent URL "+COL_PARENT+urlData.parentName+\
COL_RESET+"\n")
self.prefix = 1
self.currentPage = urlData.parentName
else:
self.prefix = 0
if self.prefix:
self.fd.write("|\n+- ")
else:
self.fd.write("\n")
self.fd.write("URL "+COL_URL+urlData.urlName+COL_RESET)
if urlData.line: self.fd.write(" (line "+`urlData.line`+")")
if urlData.cached:
self.fd.write("(cached)\n")
else:
self.fd.write("\n")
if urlData.baseRef:
if self.prefix:
self.fd.write("| ")
self.fd.write("Base "+COL_BASE+urlData.baseRef+COL_RESET+"\n")
if urlData.url:
if self.prefix:
self.fd.write("| ")
self.fd.write("Real URL "+COL_REAL+urlData.url+COL_RESET+"\n")
if urlData.time:
if self.prefix:
self.fd.write("| ")
self.fd.write("D/L Time "+COL_DLTIME+("%.3f" % urlData.time)+" seconds"+\
COL_RESET+"\n")
if urlData.infoString:
if self.prefix:
self.fd.write("| Info "+\
StringUtil.indentWith(StringUtil.blocktext(\
urlData.infoString, 65), "| "))
else:
self.fd.write("Info "+\
StringUtil.indentWith(StringUtil.blocktext(\
urlData.infoString, 65), " "))
self.fd.write(COL_RESET+"\n")
if urlData.warningString:
self.warnings = self.warnings+1
if self.prefix:
self.fd.write("| ")
self.fd.write("Warning "+COL_WARNING+urlData.warningString+\
COL_RESET+"\n")
if self.prefix:
self.fd.write("| ")
self.fd.write("Result ")
if urlData.valid:
self.fd.write(COL_VALID+urlData.validString+COL_RESET+"\n")
else:
self.errors = self.errors+1
self.fd.write(COL_INVALID+urlData.errorString+COL_RESET+"\n")
self.fd.flush()
def endOfOutput(self):
if self.prefix:
self.fd.write("o\n")
StandardLogger.endOfOutput(self)
class GMLLogger(StandardLogger):
def __init__(self,fd=sys.stdout):
StandardLogger.__init__(self,fd)
self.nodes = []
def init(self):
self.fd.write("graph [\n Creator \""+Config.AppName+\
"\"\n comment \"you get pylice at "+Config.Url+\
"\"\n comment \"write comments and bugs to "+Config.Email+\
"\"\n directed 1\n")
self.fd.flush()
def newUrl(self, urlData):
self.nodes.append(urlData)
def endOfOutput(self):
writtenNodes = {}
# write nodes
nodeid = 1
for node in self.nodes:
if node.url and not writtenNodes.has_key(node.url):
self.fd.write(" node [\n id "+`nodeid`+"\n label \""+
node.url+"\"\n ]\n")
writtenNodes[node.url] = nodeid
nodeid = nodeid + 1
# write edges
for node in self.nodes:
if node.url and node.parentName:
self.fd.write(" edge [\n label \""+node.urlName+\
"\"\n source "+`writtenNodes[node.parentName]`+\
"\n target "+`writtenNodes[node.url]`+\
"\n ]\n")
# end of output
self.fd.write("]\n")
self.fd.flush()
self.close()
class SQLLogger(StandardLogger):
""" SQL output, only tested with PostgreSQL"""
def init(self):
self.fd.write("-- created by "+Config.AppName+" at "+_currentTime()+\
"\n-- you get pylice at "+Config.Url+\
"\n-- write comments and bugs to "+Config.Email+"\n\n")
self.fd.flush()
def newUrl(self, urlData):
self.fd.write("insert into pylicedb(urlname,"+\
"recursionlevel,"+\
"parentname,"+\
"baseref,"+\
"errorstring,"+\
"validstring,"+\
"warningstring,"+\
"infoString,"+\
"valid,"+\
"url,"+\
"line,"+\
"cached) values ")
self.fd.write("'"+urlData.urlName+"',"+\
`urlData.recursionLevel`+","+\
StringUtil.sqlify(urlData.parentName)+","+\
StringUtil.sqlify(urlData.baseRef)+","+\
StringUtil.sqlify(urlData.errorString)+","+\
StringUtil.sqlify(urlData.validString)+","+\
StringUtil.sqlify(urlData.warningString)+","+\
StringUtil.sqlify(urlData.infoString)+","+\
`urlData.valid`+","+\
StringUtil.sqlify(urlData.url)+","+\
`urlData.line`+","+\
`urlData.cached`+");\n")
self.fd.flush()
def endOfOutput(self):
self.close()

View file

@ -0,0 +1,67 @@
import re,socket,string,DNS,sys
from HostCheckingUrlData import HostCheckingUrlData
from smtplib import SMTP
class MailtoUrlData(HostCheckingUrlData):
"Url link with mailto scheme"
def buildUrl(self):
HostCheckingUrlData.buildUrl(self)
if not re.compile("^mailto:([\-\w.]+@[\-\w.?=]+|[\w\s]+<[\-\w.]+@[\-\w.?=]+>)").match(self.urlName):
raise Exception, "Illegal mailto link syntax"
self.host = self.urlName[7:]
i = string.find(self.host, "<")
j = string.find(self.host, ">")
if i!=-1 and j!=-1 and i<j:
self.host = self.host[i+1:j]
i = string.find(self.host, "@")
self.user = self.host[:i]
self.host = self.host[(i+1):]
i = string.find(self.host, "?")
if i!=-1:
self.host = self.host[:i]
self.host = string.lower(self.host)
# do not lower the user name
def checkConnection(self, config):
DNS.ParseResolvConf()
mxrecords = DNS.mxlookup(self.host)
if not len(mxrecords):
self.setError("No mail host for "+self.host+" found")
return
smtpconnect = 0
for mxrecord in mxrecords:
try:
self.urlConnection = SMTP(mxrecord[1])
smtpconnect = 1
self.urlConnection.helo()
info = self.urlConnection.verify(self.user)
if info[0]==250:
self.setInfo("Verified adress: "+info[1])
except:
type, value = sys.exc_info()[:2]
#print value
if smtpconnect: break
if not smtpconnect:
self.setWarning("None of the mail hosts for "+self.host+" accepts an SMTP connection")
mxrecord = mxrecords[0][1]
else:
mxrecord = mxrecord[1]
self.setValid("found mail host "+mxrecord)
def closeConnection(self):
try: self.urlConnection.quit()
except: pass
self.urlConnection = None
def getCacheKey(self):
return "mailto:"+self.user+"@"+HostCheckingUrlData.getCacheKey(self)
def __str__(self):
return "Mailto link\n"+HostCheckingUrlData.__str__(self)

76
linkcheck/OutputReader.py Normal file
View file

@ -0,0 +1,76 @@
import string,re
import UrlData
class ParseException(Exception):
pass
class OutputReader:
ws = re.compile("\s+")
regex_realUrl = re.compile("^Real URL.+")
regex_result = re.compile("^Result.+")
regex_base = re.compile("^Base.+")
regex_info = re.compile("^Info.+")
regex_warning = re.compile("^Warning.+")
regex_parentUrl = re.compile("^Parent URL.+")
regex_valid = re.compile("^Valid.*")
def resetState(self):
self.urlName = None
self.parentName = None
self.baseRef = None
self.info = None
self.warning = None
self.result = None
self.linenumber = 0
self.state = 0
def parse(self, file):
line = file.readline()
url = None
urls = []
self.resetState()
while line:
if OutputReader.ws.match(line):
if self.state>=2:
#append url
urldata = UrlData.GetUrlDataFrom(self.urlName, 0,
self.parentName, self.baseRef, self.linenumber)
if self.info:
urldata.setInfo(self.info)
if self.warning:
urldata.setWarning(self.info)
if OutputReader.regex_valid.match(self.result):
urldata.valid=1
urldata.validString = self.result
else:
urldata.valid=0
urldata.errorString = self.result
urls.append(urldata)
elif self.state:
raise ParseException, "No Real URL and Result keyword found"
self.resetState()
elif OutputReader.regex_realUrl.match(line):
self.state = self.state+1
self.urlName = string.strip(line[8:])
elif OutputReader.regex_result.match(line):
self.state = self.state+1
self.result = string.strip(line[6:])
elif OutputReader.regex_info.match(line):
self.info = string.strip(line[4:])
elif OutputReader.regex_base.match(line):
self.baseRef = string.strip(line[4:])
elif OutputReader.regex_warning.match(line):
self.warning = string.strip(line[7:])
elif OutputReader.regex_parentUrl.match(line):
self.parentName = string.strip(line[10:])
if ',' in self.parentName:
self.parentName,self.linenumber = string.split(self.parentName,",",1)
else:
pass
line = file.readline()
return urls

156
linkcheck/RobotsTxt.py Normal file
View file

@ -0,0 +1,156 @@
import re,urlparse,string,httplib,urllib,sys,StringUtil,Config
class RobotsTxt:
def __init__(self, base, useragent):
self.entries = []
self.disallowAll = 0
self.allowAll = 0
self.base = base
try:
urlConnection = httplib.HTTP(base)
urlConnection.putrequest("GET", "/robots.txt")
urlConnection.putheader("User-agent", useragent)
urlConnection.endheaders()
status = urlConnection.getreply()[0]
if status==401 or status==403:
self.disallowAll = 1
else:
if status>=400:
self.allowAll = 1
if status<400:
self.parseUrl(urlConnection)
except:
type, value = sys.exc_info()[:2]
Config.debug("Hoppla. "+str(value))
self.allowAll = 1
def parseUrl(self, urlConnection):
data = urlConnection.getfile().readlines()
state = 0
linenumber = 0
entry = Entry()
for line in data:
line = string.lower(string.strip(line))
linenumber = linenumber + 1
if len(line)<=0:
if state==1:
raise ParseException, \
"robots.txt:"+`linenumber`+": no rules found"
elif state==2:
self.entries.append(entry)
entry = Entry()
state = 0
line = string.strip(StringUtil.stripFenceComments(line))
if len(line)<=0:
continue
if re.compile("^user-agent:.+").match(line):
if state==2:
raise ParseException, \
"robots.txt:"+`linenumber`+": user-agent in the middle of rules"
entry.useragents.append(string.strip(line[11:]))
state = 1
elif re.compile("^disallow:.+").match(line):
if state==0:
raise ParseException, \
"robots.txt:"+`linenumber`+": disallow without user agents"
line = string.strip(line[9:])
entry.rulelines.append(RuleLine(line, 0))
state = 2
elif re.compile("^allow:.+").match(line):
if state==0:
raise ParseException, \
"robots.txt:"+`linenumber`+": allow without user agents"
line = string.strip(line[6:])
entry.rulelines.append(RuleLine(line, 1))
else:
# ignore extensions
pass
def allowance(self, useragent, path):
Config.debug("DEBUG: checking allowance\n")
if self.disallowAll:
return 0
if self.allowAll:
return 1
# search for given user agent matches
# the first match counts
useragent = string.lower(useragent)
for entry in self.entries:
if entry.appliesToAgent(useragent):
return entry.allowance(path)
# agent not found ==> access granted
Config.debug("DEBUG: no match, access granted\n")
return 1
def __str__(self):
ret = "RobotsTxt\n"+\
"Base: "+self.base+"\n"+\
"AllowAll: "+`self.allowAll`+"\n"+\
"DisallowAll: "+`self.disallowAll`+"\n"
for entry in self.entries:
ret = ret + str(entry) + "\n"
return ret
class RuleLine:
def __init__(self, path, allowance):
self.path = urllib.unquote(path)
self.allowance = allowance
def appliesTo(self, filename):
return self.path=="*" or re.compile(self.path).match(filename)
def __str__(self):
if self.allowance:
return "Allow: "+self.path
return "Disallow: "+self.path
class Entry:
def __init__(self):
self.useragents = []
self.rulelines = []
def __str__(self):
ret = ""
for agent in self.useragents:
ret = ret + "User-agent: "+agent+"\n"
for line in self.rulelines:
ret = ret + str(line) + "\n"
return ret
def appliesToAgent(self, agent):
"check if this entry applies to the specified agent"
for cur_agent in self.useragents:
if cur_agent=="*":
return 1
if re.compile("^"+cur_agent).match(agent):
return 1
return 0
def allowance(self, filename):
"""Preconditions:
- out agent applies to this entry
- file is URL decoded"""
for line in self.rulelines:
if line.appliesTo(filename):
return line.allowance
return 1

View file

@ -0,0 +1,26 @@
import telnetlib,re
from HostCheckingUrlData import HostCheckingUrlData
class TelnetUrlData(HostCheckingUrlData):
"Url link with telnet scheme"
def buildUrl(self):
HostCheckingUrlData.buildUrl(self)
if not re.compile("^telnet:[\w.\-]+").match(self.urlName):
raise Exception, "Illegal telnet link syntax"
self.host = string.lower(self.urlName[7:])
def checkConnection(self, config):
HostCheckingUrlData.checkConnection(self, config)
self.urlConnection = telnetlib.Telnet()
self.urlConnection.open(self.host, 23)
def getCacheKey(self):
return "telnet:"+HostCheckingUrlData.getCacheKey(self)
def __str__(self):
return "Telnet link\n"+HostCheckingUrlData.__str__(self)

35
linkcheck/Threader.py Normal file
View file

@ -0,0 +1,35 @@
from threading import *
class Threader:
"A thread generating class"
def __init__(self, num=5):
self.maxThreads = num
self.threads = []
def acquire(self):
"Wait until we are allowed to start a new thread"
while 1:
self.reduceThreads()
if len(self.threads) < self.maxThreads:
break
def reduceThreads(self):
for t in self.threads:
if not t.isAlive():
self.threads.remove(t)
def finished(self):
return not len(self.threads)
def finish(self):
self.reduceThreads()
for t in self.threads:
pass # dont know how to stop a thread
def startThread(self, callable, args):
"Generate a new thread"
self.acquire()
t = Thread(None, callable, None, args)
t.start()
self.threads.append(t)

294
linkcheck/UrlData.py Normal file
View file

@ -0,0 +1,294 @@
import sys,re,string,urlparse,urllib,time
import Config,StringUtil
LinkTags = [("a", "href"),
("img", "src"),
("form", "action"),
("body", "background"),
("frame", "src"),
("link", "href"),
("meta", "url"), # <meta http-equiv="refresh" content="5; url=...">
("area", "href")]
class UrlData:
"Representing a URL with additional information like validity etc"
def __init__(self,
urlName,
recursionLevel,
parentName = None,
baseRef = None,
line = 0, _time = 0):
self.urlName = urlName
self.recursionLevel = recursionLevel
self.parentName = parentName
self.baseRef = baseRef
self.errorString = "Error"
self.validString = "Valid"
self.warningString = None
self.infoString = None
self.valid = 1
self.url = None
self.line = line
self.time = _time
self.cached = 0
self.urlConnection = None
def setError(self, s):
self.valid=0
self.errorString = "Error: " + s
def setValid(self, s):
self.valid=1
self.validString = "Valid: " + s
def isHtml(self):
return 0
def setWarning(self, s):
if self.warningString:
self.warningString = self.warningString+"\n" + s
else:
self.warningString = s
def setInfo(self, s):
if self.infoString:
self.infoString = self.infoString+"\n"+s
else:
self.infoString = s
def copyFrom(self, urlData):
self.errorString = urlData.errorString
self.validString = urlData.validString
self.warningString = urlData.warningString
self.infoString = urlData.infoString
self.valid = urlData.valid
self.time = urlData.time
def buildUrl(self):
if self.baseRef:
self.url = urlparse.urljoin(self.baseRef, self.urlName)
elif self.parentName:
self.url = urlparse.urljoin(self.parentName, self.urlName)
else:
self.url = self.urlName
self.urlTuple = urlparse.urlparse(self.url)
# make host lowercase
self.urlTuple = (self.urlTuple[0],string.lower(self.urlTuple[1]),
self.urlTuple[2],self.urlTuple[3],self.urlTuple[4],
self.urlTuple[5])
self.url = urlparse.urlunparse(self.urlTuple)
def logMe(self, config):
if config["verbose"] or not self.valid or \
(self.warningString and config["warnings"]):
config.log_newUrl(self)
def check(self, config):
Config.debug(Config.DebugDelim+"Checking\n"+str(self)+"\n"+\
Config.DebugDelim)
# check syntax
Config.debug("DEBUG: checking syntax\n")
if not self.urlName or self.urlName=="":
self.setError("URL is null or empty")
self.logMe(config)
return
try: self.buildUrl()
except:
type, value = sys.exc_info()[:2]
self.setError(str(value))
self.logMe(config)
return
# check the cache
Config.debug("DEBUG: checking cache\n")
if config.urlCache_has_key(self.getCacheKey()):
self.copyFrom(config.urlCache_get(self.getCacheKey()))
self.cached = 1
self.logMe(config)
return
# apply filter
Config.debug("DEBUG: checking filter\n")
if config["strict"] and self.isExtern(config):
self.setWarning("outside of domain filter, checked only syntax")
self.logMe(config)
return
# check connection
Config.debug("DEBUG: checking connection\n")
try:
self.checkConnection(config)
if self.urlTuple and config["anchors"]:
self.checkAnchors(self.urlTuple[5])
except:
type, value = sys.exc_info()[:2]
self.setError(str(value))
# check recursion
Config.debug("DEBUG: checking recursion\n")
if self.allowsRecursion(config):
self.parseUrl(config)
self.closeConnection()
self.logMe(config)
self.putInCache(config)
def closeConnection(self):
# brute force closing
try: self.urlConnection.close()
except: pass
# release variable for garbage collection
self.urlConnection = None
def putInCache(self, config):
cacheKey = self.getCacheKey()
if cacheKey and not self.cached:
config.urlCache_set(cacheKey, self)
self.cached = 1
def getCacheKey(self):
if self.urlTuple:
return urlparse.urlunparse(self.urlTuple)
return None
def checkConnection(self, config):
self.urlConnection = urllib.urlopen(self.url)
def allowsRecursion(self, config):
return self.valid and \
self.isHtml() and \
not self.cached and \
self.recursionLevel < config["recursionlevel"] and \
not self.isExtern(config)
def isHtml(self):
return 0
def checkAnchors(self, anchor):
if not (anchor!="" and self.isHtml() and self.valid):
return
for cur_anchor,line in self.searchInForTag(self.getContent(), ("a", "name")):
if cur_anchor == anchor:
return
self.setWarning("anchor #"+anchor+" not found")
def isExtern(self, config):
if len(config["externlinks"])==0 and len(config["internlinks"])==0:
return 0
# deny and allow external checking
Config.debug(self.url)
if config["allowdeny"]:
for pat in config["internlinks"]:
if pat.search(self.url):
return 0
for pat in config["externlinks"]:
if pat.search(self.url):
return 1
else:
for pat in config["externlinks"]:
if pat.search(self.url):
return 1
for pat in config["internlinks"]:
if pat.search(self.url):
return 0
return 1
def getContent(self):
"""Precondition: urlConnection is an opened URL.
"""
t = time.time()
data = StringUtil.stripHtmlComments(self.urlConnection.read())
self.time = time.time() - t
return data
def parseUrl(self, config):
Config.debug(Config.DebugDelim+"Parsing recursively into\n"+\
str(self)+"\n"+Config.DebugDelim)
data = self.getContent()
# search for a possible base reference
bases = self.searchInForTag(data, ("base", "href"))
baseRef = None
if len(bases)>=1:
baseRef = bases[0][0]
if len(bases)>1:
self.setWarning("more than one base tag found")
# search for tags and add found tags to URL queue
for tag in LinkTags:
urls = self.searchInForTag(data, tag)
Config.debug("DEBUG: "+str(tag)+" urls="+str(urls)+"\n")
for _url,line in urls:
config.appendUrl(GetUrlDataFrom(_url,
self.recursionLevel+1, self.url, baseRef, line))
def searchInForTag(self, data, tag):
_urls = []
_prefix="<\s*"+tag[0]+"\s+[^>]*?"+tag[1]+"\s*=\s*"
_suffix="[^>]*>"
_patterns = [re.compile(_prefix+"\"([^\"]+)\""+_suffix, re.I),
re.compile(_prefix+"([^\s>]+)" +_suffix, re.I)]
cutofflines = 0
for _pattern in _patterns:
while 1:
_match = _pattern.search(data)
if not _match: break
# need to strip optional ending quotes for the <meta url=> tag
linenumberbegin = StringUtil.getLineNumber(data, _match.start(0))
linenumberend = StringUtil.getLineNumber(data, _match.end(0))
cutofflines = cutofflines + linenumberend - linenumberbegin
_urls.append((string.strip(StringUtil.rstripQuotes(_match.group(1))),
linenumberbegin + cutofflines))
data = data[:_match.start(0)] + data[_match.end(0):]
return _urls
def __str__(self):
return "urlname="+`self.urlName`+"\nparentName="+`self.parentName`+\
"\nbaseRef="+`self.baseRef`+"\ncached="+`self.cached`+\
"\nrecursionLevel="+`self.recursionLevel`+\
"\nurlConnection="+str(self.urlConnection)
from FileUrlData import FileUrlData
from FtpUrlData import FtpUrlData
from GopherUrlData import GopherUrlData
from HttpUrlData import HttpUrlData
from HttpsUrlData import HttpsUrlData
from JavascriptUrlData import JavascriptUrlData
from MailtoUrlData import MailtoUrlData
from TelnetUrlData import TelnetUrlData
def GetUrlDataFrom(urlName,
recursionLevel,
parentName = None,
baseRef = None, line = 0, _time = 0):
# search for the absolute url
name=""
if urlName and ":" in urlName:
name = string.lower(urlName)
elif baseRef and ":" in baseRef:
name = string.lower(baseRef)
elif parentName and ":" in parentName:
name = string.lower(parentName)
# test scheme
if re.compile("^http:").search(name):
return HttpUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)
if re.compile("^ftp:").search(name):
return FtpUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)
if re.compile("^file:").search(name):
return FileUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)
if re.compile("^telnet:").search(name):
return TelnetUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)
if re.compile("^mailto:").search(name):
return MailtoUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)
if re.compile("^gopher:").search(name):
return GopherUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)
if re.compile("^javascript:").search(name):
return JavascriptUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)
if re.compile("^https:").search(name):
return HttpsUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)
# assume local file
return FileUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)

15
linkcheck/__init__.py Normal file
View file

@ -0,0 +1,15 @@
# __init__.py for this module
import Config,UrlData,OutputReader,sys
def checkUrls(config = Config.Configuration()):
config.log_init()
try:
while not config.finished():
if config.hasMoreUrls():
config.checkUrl(config.getUrl())
except KeyboardInterrupt:
config.finish()
config.log_endOfOutput()
sys.exit(1) # this is not good(tm)
config.log_endOfOutput()

247
linkchecker Executable file
View file

@ -0,0 +1,247 @@
#!/usr/bin/env python
import getopt,sys,re,string
if sys.version[:5] < "1.5.2":
print "This program requires Python 1.5.2 or later."
sys.exit(1)
# add the path to linkcheck module
sys.path.insert(0, "/usr/share/linkchecker")
import linkcheck
Usage = """USAGE\tpylice [options] file_or_url...
OPTIONS
-a, --anchors
Check anchor references. Default is don't check anchors.
-D, --debug
Print additional debugging information.
-e regex, --extern=regex
Assume urls that match the given expression as extern.
Only intern HTTP links are checked recursively.
-f file, --config=file
Use file as configuration file. Pylice first searches ~/.pylicerc
and then /etc/pylicerc (under Windows <path-to-program>\\pylicerc).
-i regex, --intern=regex
Assume urls that match the given expression as intern.
-h, --help
Help me! Print usage information for this program.
-l, --allowdeny
Swap checking order to intern/extern. Default checking order
is extern/intern.
-o name, --output=name
Specify output as """+linkcheck.Config.LoggerKeys+""".
Default is text.
-W name, --file-output=name
Same as output, but write to a file pylice-out.<name>.
If the file already exists, it is overwritten.
You can specify this option more than once.
Default is no file output.
-p pwd, --password=pwd
Try given password for HTML and FTP authorization.
Default is 'joe@'. See -u.
-P host[:port], --proxy=host[:port]
Use specified proxy for HTTP requests.
Standard port is 8080. Default is to use no proxy.
-q, --quiet
Quiet operation. This is only useful with -W.
-r depth, --recursion-level=depth
Check recursively all links up to given depth (depth >= 0).
Default depth is 1.
-R, --robots-txt
Obey the robots exclusion standard.
-s, --strict
Check only syntax of extern links, do not try to connect to them.
-t num, --threads=num
Generate no more than num threads. Default number of threads is 5.
To disable threading specify a non-positive number.
-u name, --user=name
Try given username for HTML and FTP authorization.
Default is 'anonymous'. See -p.
-V, --version
Print version and exit.
-v, --verbose
Log all checked URLs (implies -w). Default is to log only invalid
URLs.
-w, --warnings
Log warnings.
"""
Notes = """NOTES
o Pylice assumes an http:// resp. ftp:// link when a commandline URL
starts with "www." resp. "ftp.".
You can also give local files as arguments.
o If you have your system configured to automatically establish a
connection to the internet (e.g. with diald), it will connect when
checking links not pointing to your local host.
Use the -s and -i options to prevent this (see EXAMPLES).
o Javascript and https links are currently ignored
o If your platform does not support threading, pylice assumes -t0
"""
Examples = """EXAMPLES
o pylice -v -o html -r2 -s -i treasure.calvinsplayground.de \\
http://treasure.calvinsplayground.de/~calvin/ > sample.html
generates the included sample.html file
o Local files and syntactic sugar on the command line:
pylice c:\\temp\\test.html
pylice ../bla.html
pylice www.myhomepage.de
pylice -r0 ftp.linux.org
"""
def printVersion():
print Config.AppInfo
sys.exit(0)
def printHelp():
print Usage
print Notes
print Examples
sys.exit(0)
def printUsage(msg):
sys.stderr.write("Error: "+str(msg)+"\nType pylice -h for help\n")
sys.exit(1)
# Read command line arguments
try:
# Note: cut out the name of the script
options, args = getopt.getopt(sys.argv[1:], "aDe:f:hi:lP:o:p:qr:Rst:u:VvwW:",
["anchors",
"config=",
"debug",
"extern=",
"file-output=",
"help",
"intern=",
"allowdeny",
"output=",
"proxy=",
"password=",
"quiet",
"recursion-level=",
"robots-txt",
"strict",
"threads=",
"user=",
"version",
"verbose",
"warnings"])
except getopt.error:
type, value = sys.exc_info()[:2]
printUsage(value)
# apply configuration
config = linkcheck.Config.Configuration()
try:
configfile = []
for opt,arg in options:
if opt=="-f" or opt=="--config":
configfile.append(arg)
config.read(configfile)
except:
type, value = sys.exc_info()[:2]
printUsage(value)
# apply options and arguments
constructAuth = 0
for opt,arg in options:
if opt=="-a" or opt=="--anchors":
config["anchors"] = 1
elif opt=="-D" or opt=="--debug":
linkcheck.Config.DebugFlag = 1
elif opt=="-e" or opt=="--extern":
config["externlinks"].append(re.compile(arg))
elif opt=="-h" or opt=="--help":
printHelp()
elif opt=="-o" or opt=="--output":
if linkcheck.Config.Loggers.has_key(arg):
config["log"] = linkcheck.Config.Loggers[arg]()
else:
printUsage("Legal output arguments are "+linkcheck.Config.LoggerKeys+".")
elif opt=="-W" or opt=="--file-output":
if linkcheck.Config.Loggers.has_key(arg):
config["fileoutput"].append(linkcheck.Config.Loggers[arg](open("pylice-out."+arg, "w")))
else:
printUsage("Legal output arguments are "+linkcheck.Config.LoggerKeys+".")
elif opt=="-i" or opt=="--intern":
config["internlinks"].append(re.compile(arg))
elif opt=="-l" or opt=="--allowdeny":
config["allowdeny"] = 1
elif opt=="-P" or opt=="--proxy":
proxy = re.compile("(.+):(.+)").match(arg)
if proxy:
config["proxy"] = proxy.group(1)
config["proxyport"] = int(proxy.group(2))
else:
config["proxy"] = arg
elif opt=="-p" or opt=="--password":
config["password"]=arg
constructAuth=constructAuth+1
elif opt=="-q" or opt=="--quiet":
config["quiet"]=1
elif opt=="-r" or opt=="--recursion-level":
if int(arg) >= 0:
config["recursionlevel"] = int(arg)
else:
printUsage("Illegal recursion-level number: "+arg)
elif opt=="-R" or opt=="--robots-txt":
config["robotstxt"] = 1
elif opt=="-s" or opt=="--strict":
config["strict"] = 1
elif opt=="-t" or opt=="--threads":
num = int(arg)
if config["threads"]:
if num>0:
config.enableThreading(num)
else:
config.disableThreading()
elif opt=="-u" or opt=="--user":
config["user"] = arg
constructAuth=constructAuth+1
elif opt=="-V" or opt=="--version":
printVersion()
elif opt=="-v" or opt=="--verbose":
config["verbose"] = 1
config["warnings"] = 1
elif opt=="-w" or opt=="--warnings":
config["warnings"] = 1
if constructAuth and constructAuth!=2:
sys.stderr.write("Warning: try to give me both Username and Password\n")
if len(args)==0:
printUsage("no files or urls given")
for url in args:
if not (":" in url):
if re.compile("^ftp\.").match(url):
url = "ftp://"+url
elif re.compile("^www\.").match(url):
url = "http://"+url
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0))
linkcheck.checkUrls(config)

8
linkchecker.bat Normal file
View file

@ -0,0 +1,8 @@
@echo off
rem === adjust vars below ===
set PYTHON=c:\progra~1\python\python.exe
set LINKCHECKER=c:\progra~1\linkchecker-1.1.0
rem === end configure ===
%PYTHON% %LINKCHECKER%\pylice %1 %2 %3 %4 %5 %6 %7 %8 %9

24
linkcheckerrc Normal file
View file

@ -0,0 +1,24 @@
# sample resource file
# see linkchecker -h for help on these options
[output]
#debug=1
#log=colored
#verbose=1
#warnings=1
#quiet=0
[checking]
#threads=15
#anchors=1
#externlinks=
#internlinks=
#allowdeny=1
#password=calvin@
#user=anonymous
#recursionlevel=1
#robotstxt=1
#strict=1
#proxy=
#proxyport=8080

26
parsetest.py Normal file
View file

@ -0,0 +1,26 @@
import sys,StringIO,LinkChecker
def linkcheck(urls):
"Check a list of http://, file:// etc. urls"
config = LinkChecker.Config.Configuration()
config["verbose"]=1
config["warnings"]=1
# no more options, use defaults
# add urls
for url in urls:
config.appendUrl(LinkChecker.UrlData.GetUrlDataFrom(url, 0))
# check it
LinkChecker.checkUrls(config)
old_stdout = sys.stdout
sys.stdout = StringIO.StringIO()
linkcheck(['http://fsinfo.cs.uni-sb.de/~calvin'])
sys.stdout.seek(0)
reader = LinkChecker.OutputReader.OutputReader()
old_stdout.write(sys.stdout.getvalue())
result = reader.parse(sys.stdout)
sys.stdout = old_stdout
for url in result:
print str(url)

1
test/.cvsignore Normal file
View file

@ -0,0 +1 @@
*.result

6
test/base1.html Normal file
View file

@ -0,0 +1,6 @@
<!-- base testing -->
<base target="_top">
<a href
=
"file:/etc">

3
test/base2.html Normal file
View file

@ -0,0 +1,3 @@
<base href="file:/etc/">
<a href="passwd">

2
test/base3.html Normal file
View file

@ -0,0 +1,2 @@
<base href="http://treasure.calvinsplayground.de/~calvin/">
<a href="index.shtml">

4
test/frames.html Normal file
View file

@ -0,0 +1,4 @@
<frameset border="0" frameborder="0" framespacing="0">
<frame name="top" src="test1.html" frameborder="0">
<frame name="bottom" src="test2.html" frameborder="0">
</frameset>

17
test/test1.html Normal file
View file

@ -0,0 +1,17 @@
Just some HTTP links
<a href="http://www.garantiertnixgutt.bla">
<a href="http://www.heise.de">
<a href="http:/www.heise.de">
<a href="http:www.heise.de">
<a href="http://">
<a href="http:/">
<a href="http:">
<a name="iswas"> <!-- anchor for test2.html -->
<a href=http://slashdot.org/>
<a href="http://treasure.calvinsplayground.de/~calvin/software/#isnix">
<a href="https://www.heise.de"> <!-- ignore -->
<a href="HtTP://WWW.hEIsE.DE">
<a href="HTTP://WWW.HEISE.DE"> <!-- should be cached -->
<!-- <a href=http://nocheckin> -->
<!-- check the parser at end of file -->
<a href="g

23
test/test2.html Normal file
View file

@ -0,0 +1,23 @@
<meta http-equiv="refresh" content="5; url=http://localhost">
<a href="hutzli:nixgutt">
<a href="javascript:loadthis()">
<a href="file:///etc/group">
<a href="file://etc/group">
<a href="file:/etc/group">
<a href="file:etc/group">
<a href="file:/etc/">
<a href="test1.html">
<a href="test1.html#isnix">
<a href="test1.html#iswas">
<a href=mailto:calvin@localhost?subject=Hallo Pfuscher>
<a href=mailto:Bastian Kleineidam <calvin@host1?foo=bar>>
<a href="mailto:Bastian Kleineidam <calvin@studcs.uni-sb.de>">
<a href="mailto:calvin@host3">
<a href="mailto:">
<a href="telnet:localhost">
<a href="telnet:">
<a href="ftp:/treasure.calvinsplayground.de/pub">
<a href="ftp://treasure.calvinsplayground.de/pub">
<a href="ftp://treasure.calvinsplayground.de//pub">
<a href="ftp://treasure.calvinsplayground.de////////pub">
<a href="ftp:///treasure.calvinsplayground.de/pub">

22
tests/test.py Executable file
View file

@ -0,0 +1,22 @@
#!/opt/python/bin/python1.5
import DNS
# automatically load nameserver(s) from /etc/resolv.conf
# (works on unix - on others, YMMV)
DNS.ParseResolvConf()
# lets do an all-in-one request
# set up the request object
r = DNS.DnsRequest(name='munnari.oz.au',qtype='A')
# do the request
a=r.req()
# and do a pretty-printed output
a.show()
# now lets setup a reusable request object
r = DNS.DnsRequest(qtype='ANY')
res = r.req("a.root-servers.nex",qtype='ANY')
res.show()
res = r.req("proxy.connect.com.au")
res.show()

17
tests/test2.py Executable file
View file

@ -0,0 +1,17 @@
#!/opt/python/bin/python1.5
import DNS
# automatically load nameserver(s) from /etc/resolv.conf
# (works on unix - on others, YMMV)
DNS.ParseResolvConf()
r=DNS.Request(qtype='mx')
res = r.req('connect.com.au')
res.show()
r=DNS.Request(qtype='soa')
res = r.req('connect.com.au')
res.show()
print DNS.revlookup('192.189.54.17')

13
tests/test3.py Executable file
View file

@ -0,0 +1,13 @@
#!/opt/python/bin/python1.5
import DNS
# automatically load nameserver(s) from /etc/resolv.conf
# (works on unix - on others, YMMV)
DNS.ParseResolvConf()
# web server reliability, the NT way. *snigger*
res = r.req('www.microsoft.com',qtype='A')
# res.answers is a list of dictionaries of answers
print len(res.answers),'different A records'
# each of these has an entry for 'data', which is the result.
print map(lambda x:x['data'], res.answers)

7
tests/test4.py Executable file
View file

@ -0,0 +1,7 @@
#!/opt/python/bin/python
import DNS
DNS.ParseResolvConf()
print DNS.mxlookup("connect.com.au")

52
tests/test5.py Executable file
View file

@ -0,0 +1,52 @@
#!/opt/python/bin/python
import DNS
DNS.ParseResolvConf()
def Error(mesg):
import sys
print sys.argv[0],"ERROR:"
print mesg
sys.exit(1)
def main():
import sys
if len(sys.argv) != 2:
Error("usage: %s somedomain.com"%sys.argv[0])
domain = sys.argv[1]
nslist = GetNS(domain)
print "According to the primary, the following are nameservers for this domain"
for ns in nslist:
print " ",ns
CheckNS(ns,domain)
def GetNS(domain):
import DNS
r = DNS.Request(domain,qtype='SOA').req()
if r.header['status'] != 'NOERROR':
Error("received status of %s when attempting to look up SOA for domain"%
(r.header['status']))
primary,email,serial,refresh,retry,expire,minimum = r.answers[0]['data']
print "Primary nameserver for domain %s is: %s"%(domain,primary)
r = DNS.Request(domain,qtype='NS',server=primary,aa=1).req()
if r.header['status'] != 'NOERROR':
Error("received status of %s when attempting to query %s for NSs"%
(r.header['status']))
if r.header['aa'] != 1:
Error("primary NS %s doesn't believe that it's authoritative!"% primary)
nslist = map(lambda x:x['data'], r.answers)
return nslist
def CheckNS(nameserver,domain):
r = DNS.Request(domain,qtype='SOA',server=nameserver,aa=1).req()
if r.header['status'] != 'NOERROR':
Error("received status of %s when attempting to query %s for NS"%
(r.header['status']))
if r.header['aa'] != 1:
Error("NS %s doesn't believe that it's authoritative!"% nameserver)
primary,email,serial,refresh,retry,expire,minimum = r.answers[0]['data']
print " NS has serial",serial[1]
if __name__ == "__main__":
main()