mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-16 22:10:26 +00:00
Initial revision
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@5 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
commit
0329ca7682
100 changed files with 9413 additions and 0 deletions
3
.cvsignore
Normal file
3
.cvsignore
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
build-stamp
|
||||
sample.html
|
||||
linkchecker-out.*
|
||||
148
ChangeLog
Normal file
148
ChangeLog
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
25.2.2000
|
||||
* changed the name to LinkChecker. My old Java LinkChecker will
|
||||
disappear because I do not maintain it anymore.
|
||||
|
||||
21.2.2000
|
||||
* add -q, --quiet option
|
||||
* convert all url host to lowercase
|
||||
* log the download time for urls
|
||||
|
||||
20.2.2000
|
||||
* add Graph Modelling Language (GML) output for sitemaps
|
||||
* add SQL output
|
||||
|
||||
19.2.2000
|
||||
* second try with HTTP/1.1: additionally close response
|
||||
* remove deprecated options
|
||||
* new option -W, --file-output
|
||||
* fix typo for --password option
|
||||
|
||||
18.2.2000
|
||||
* add "-" to mail adress syntax (Baz <B.Rowlingson@lancaster.ac.uk>)
|
||||
* fix typo in pylice (David J. MacKenzie <djm@web.us.uu.net>)
|
||||
|
||||
10.2.2000 Version 0.8.0
|
||||
* clean the CVS dir
|
||||
* fixes for configuration
|
||||
* first version of configuration parsing
|
||||
|
||||
9.2.2000
|
||||
* do not pass anchor in HTTP requests
|
||||
* fixes for configuration parsing
|
||||
|
||||
8.2.2000
|
||||
* fixed bad finished_NoThreads function
|
||||
* backed out HTTP/1.1 support. This library is buggy and
|
||||
does not close some filehandles. Eventually you will get
|
||||
a "Too many open files" error
|
||||
* strip whitespace from parsed urls
|
||||
|
||||
6.2.2000
|
||||
* fixed some bugs, the test suite is running again
|
||||
|
||||
5.2.2000
|
||||
* made "LinkChecker" module
|
||||
* configuration is dynamic; no more class variables
|
||||
* print line number
|
||||
* more agressive closing of filehandles
|
||||
|
||||
27.1.2000 Version 0.7.0
|
||||
* put pylicerc in /etc for .deb package
|
||||
* HTTP/1.1 support with httplib.py from Greg Stein
|
||||
* DNS MX lookup for mail adresses
|
||||
use the DNS module from Guido van Rossum and Anthony Baxter
|
||||
MX lookup was a suggestion to LinkChecker from
|
||||
Jimmy Engelbrecht <jimmy@e.kth.se>
|
||||
|
||||
26.1.2000 Version 0.6.2
|
||||
* refined HTML link syntax to handle non-quoted URLs
|
||||
* fix: set urlTuple to None if we cannot check anchors
|
||||
* fixed anchor checking again
|
||||
|
||||
25.1.2000 Version 0.6.1
|
||||
* fixed the HTML link syntax
|
||||
|
||||
24.1.2000
|
||||
* fix: -e option did not work properly
|
||||
* fix: reenabled LinkChecker Online, updated to 0.6.0
|
||||
|
||||
21.1.2000 Version 0.6.0
|
||||
* fix: add hostname for relative redirections
|
||||
* Added TODO list
|
||||
|
||||
20.1.2000
|
||||
* Added documentation for the LinkChecker class
|
||||
|
||||
19.1.2000
|
||||
* HTTP Proxy support
|
||||
* CGI logging
|
||||
|
||||
18.1.2000 Version 0.5.0
|
||||
* anchor checking in local HTML files
|
||||
* configuration file
|
||||
* HTTP Authorization support
|
||||
* Send HTTP HEAD method to check and GET method to get contents
|
||||
* Still missing: Proxy support (including HTTP status code 305)
|
||||
|
||||
17.1.2000
|
||||
* cut parameter, query and fragment of local file names
|
||||
* limit number of redirections to 5
|
||||
|
||||
14.1.2000 Version 0.4.3
|
||||
* pylice.bat fix: now it really works
|
||||
* fix for local Windows file arguments
|
||||
|
||||
14.1.2000 Version 0.4.2
|
||||
* StringUtil.indentWith: use string multiplying
|
||||
* Still missing: HTTP authorization and Proxy support
|
||||
* pylice.bat fix: pass parameters
|
||||
|
||||
13.1.2000 Version 0.4.1
|
||||
* Windows python.bat script
|
||||
* installation updates
|
||||
* additional .zip package for Windows
|
||||
|
||||
12.1.2000 Version 0.4.0
|
||||
* fixed LinkChecker.NumThreads setting: if the platform
|
||||
does not support threading, it is disabled automagically
|
||||
* robots.txt parsing
|
||||
* split up UrlData.py
|
||||
* simplified option parsing
|
||||
* strip optional quotes from urls
|
||||
* use quit() not close() to disconnect from FTP servers
|
||||
|
||||
11.1.2000 Version 0.3.0
|
||||
* try to finger for mailto: links
|
||||
* try to connect for telnet: links
|
||||
* removed time.sleep(1) commands, they are not necessary
|
||||
* restrict CGI to recursion level 3
|
||||
* make UrlCache and RobotsTxtCache thread safe
|
||||
* fixed the 'No more open files' bug by closing all connections
|
||||
* fixed thread synchronization in LinkChecker while loop
|
||||
* you can specify -t 0 on the commandline to disable threading
|
||||
* STILL MISSING:
|
||||
HTTP authorization, Proxy and robots.txt parsing
|
||||
|
||||
10.1.2000 Version 0.2.0
|
||||
* configure option to disable threading: LinkChecker.threadsupport
|
||||
* do not rely on self.mime in HttpUrlData, this could be None
|
||||
* flush stdout after each log entry
|
||||
* use LinkChecker.User and LinkChecker.Password in FTP connections
|
||||
* make sure redirection is not cyclic
|
||||
|
||||
9.1.2000 Version 0.1.0
|
||||
* HTTP request
|
||||
* FTP request
|
||||
* fixed MaxRecursionLevel setting
|
||||
* fixed name clash of variable and function warning
|
||||
* ColoredLogger
|
||||
* small doc changes
|
||||
* CGI and HTML files for LinkChecker Online,
|
||||
but I still have to install Python on my http server
|
||||
(will try this tomorrow)
|
||||
|
||||
8.1.2000
|
||||
* Properties, Threader, LinkChecker, UrlData, Logging
|
||||
|
||||
7.1.2000 Version 0.0.1
|
||||
* Option processing
|
||||
215
DNS/Base.py
Normal file
215
DNS/Base.py
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
# $Id$
|
||||
import sys
|
||||
import getopt
|
||||
import socket
|
||||
import string
|
||||
import DNS,DNS.Lib,DNS.Type,DNS.Class,DNS.Opcode
|
||||
#import asyncore
|
||||
|
||||
defaults= { 'protocol':'udp', 'port':53, 'opcode':DNS.Opcode.QUERY,
|
||||
'qtype':DNS.Type.A, 'rd':1, 'timing':1 }
|
||||
|
||||
defaults['server']=[]
|
||||
|
||||
def ParseResolvConf():
|
||||
"parses the /etc/resolv.conf file and sets defaults for name servers"
|
||||
import string
|
||||
global defaults
|
||||
lines=open("/etc/resolv.conf").readlines()
|
||||
for line in lines:
|
||||
string.strip(line)
|
||||
if line[0]==';' or line[0]=='#':
|
||||
continue
|
||||
fields=string.split(line)
|
||||
if fields[0]=='domain':
|
||||
defaults['domain']=fields[1]
|
||||
if fields[0]=='search':
|
||||
pass
|
||||
if fields[0]=='options':
|
||||
pass
|
||||
if fields[0]=='sortlist':
|
||||
pass
|
||||
if fields[0]=='nameserver':
|
||||
defaults['server'].append(fields[1])
|
||||
|
||||
|
||||
|
||||
class DnsRequest:
|
||||
def __init__(self,*name,**args):
|
||||
self.donefunc=None
|
||||
self.async=None
|
||||
self.defaults = {}
|
||||
self.argparse(name,args)
|
||||
self.defaults = self.args
|
||||
|
||||
def argparse(self,name,args):
|
||||
if not name and self.defaults.has_key('name'):
|
||||
args['name'] = self.defaults['name']
|
||||
if type(name) is type(""):
|
||||
args['name']=name
|
||||
else:
|
||||
if len(name) == 1:
|
||||
if name[0]:
|
||||
args['name']=name[0]
|
||||
for i in defaults.keys():
|
||||
if not args.has_key(i):
|
||||
if self.defaults.has_key(i):
|
||||
args[i]=self.defaults[i]
|
||||
else:
|
||||
args[i]=defaults[i]
|
||||
if type(args['server']) == type(''):
|
||||
args['server'] = [args['server']]
|
||||
self.args=args
|
||||
|
||||
def socketInit(self,a,b):
|
||||
import socket
|
||||
self.s = socket.socket(a,b)
|
||||
|
||||
def processUDPReply(self):
|
||||
import time
|
||||
self.reply = self.s.recv(1024)
|
||||
self.time_finish=time.time()
|
||||
self.args['server']=self.ns
|
||||
return self.processReply()
|
||||
|
||||
def processTCPReply(self):
|
||||
import time
|
||||
self.f = self.s.makefile('r')
|
||||
header = self.f.read(2)
|
||||
if len(header) < 2:
|
||||
raise DNS.Error,'EOF'
|
||||
count = DNS.Lib.unpack16bit(header)
|
||||
self.reply = self.f.read(count)
|
||||
if len(self.reply) != count:
|
||||
raise DNS.Error,'incomplete reply'
|
||||
self.time_finish=time.time()
|
||||
self.args['server']=self.ns
|
||||
return self.processReply()
|
||||
|
||||
def processReply(self):
|
||||
import time
|
||||
self.args['elapsed']=(self.time_finish-self.time_start)*1000
|
||||
u = DNS.Lib.Munpacker(self.reply)
|
||||
r=DNS.Lib.DnsResult(u,self.args)
|
||||
r.args=self.args
|
||||
#self.args=None # mark this DnsRequest object as used.
|
||||
return r
|
||||
#### TODO TODO TODO ####
|
||||
if protocol == 'tcp' and qtype == DNS.Type.AXFR:
|
||||
while 1:
|
||||
header = f.read(2)
|
||||
if len(header) < 2:
|
||||
print '========== EOF =========='
|
||||
break
|
||||
count = DNS.Lib.unpack16bit(header)
|
||||
if not count:
|
||||
print '========== ZERO COUNT =========='
|
||||
break
|
||||
print '========== NEXT =========='
|
||||
reply = f.read(count)
|
||||
if len(reply) != count:
|
||||
print '*** Incomplete reply ***'
|
||||
break
|
||||
u = DNS.Lib.Munpacker(reply)
|
||||
DNS.Lib.dumpM(u)
|
||||
|
||||
def conn(self):
|
||||
self.s.connect((self.ns,self.port))
|
||||
|
||||
def req(self,*name,**args):
|
||||
import time,sys
|
||||
self.argparse(name,args)
|
||||
#if not self.args:
|
||||
# raise DNS.Error,'reinitialize request before reuse'
|
||||
protocol = self.args['protocol']
|
||||
self.port = self.args['port']
|
||||
opcode = self.args['opcode']
|
||||
rd = self.args['rd']
|
||||
server=self.args['server']
|
||||
if type(self.args['qtype']) == type('foo'):
|
||||
try:
|
||||
qtype = eval(string.upper(self.args['qtype']), DNS.Type.__dict__)
|
||||
except (NameError,SyntaxError):
|
||||
raise DNS.Error,'unknown query type'
|
||||
else:
|
||||
qtype=self.args['qtype']
|
||||
if not self.args.has_key('name'):
|
||||
print self.args
|
||||
raise DNS.Error,'nothing to lookup'
|
||||
qname = self.args['name']
|
||||
if qtype == DNS.Type.AXFR:
|
||||
print 'Query type AXFR, protocol forced to TCP'
|
||||
protocol = 'tcp'
|
||||
#print 'QTYPE %d(%s)' % (qtype, DNS.Type.typestr(qtype))
|
||||
m = DNS.Lib.Mpacker()
|
||||
m.addHeader(0,
|
||||
0, opcode, 0, 0, rd, 0, 0, 0,
|
||||
1, 0, 0, 0)
|
||||
m.addQuestion(qname, qtype, DNS.Class.IN)
|
||||
self.request = m.getbuf()
|
||||
if protocol == 'udp':
|
||||
self.response=None
|
||||
self.socketInit(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
for self.ns in server:
|
||||
try:
|
||||
#self.s.connect((self.ns, self.port))
|
||||
self.conn()
|
||||
self.time_start=time.time()
|
||||
if not self.async:
|
||||
self.s.send(self.request)
|
||||
self.response=self.processUDPReply()
|
||||
#except socket.error:
|
||||
except None:
|
||||
continue
|
||||
break
|
||||
if not self.response:
|
||||
if not self.async:
|
||||
raise DNS.Error,'no working nameservers found'
|
||||
else:
|
||||
self.response=None
|
||||
for self.ns in server:
|
||||
try:
|
||||
self.socketInit(socket.AF_INET, socket.SOCK_STREAM)
|
||||
self.time_start=time.time()
|
||||
self.conn()
|
||||
self.s.send(DNS.Lib.pack16bit(len(self.request)) + self.request)
|
||||
self.s.shutdown(1)
|
||||
self.response=self.processTCPReply()
|
||||
except socket.error:
|
||||
continue
|
||||
break
|
||||
if not self.response:
|
||||
raise DNS.Error,'no working nameservers found'
|
||||
if not self.async:
|
||||
return self.response
|
||||
|
||||
#class DnsAsyncRequest(DnsRequest,asyncore.dispatcher_with_send):
|
||||
class DnsAsyncRequest(DnsRequest):
|
||||
def __init__(self,*name,**args):
|
||||
if args.has_key('done') and args['done']:
|
||||
self.donefunc=args['done']
|
||||
else:
|
||||
self.donefunc=self.showResult
|
||||
self.realinit(name,args)
|
||||
self.async=1
|
||||
def conn(self):
|
||||
import time
|
||||
self.connect(self.ns,self.port)
|
||||
self.time_start=time.time()
|
||||
if self.args.has_key('start') and self.args['start']:
|
||||
asyncore.dispatcher.go(self)
|
||||
def socketInit(self,a,b):
|
||||
self.create_socket(a,b)
|
||||
asyncore.dispatcher.__init__(self)
|
||||
self.s=self
|
||||
def handle_read(self):
|
||||
if self.args['protocol'] == 'udp':
|
||||
self.response=self.processUDPReply()
|
||||
if self.donefunc:
|
||||
apply(self.donefunc,(self,))
|
||||
def handle_connect(self):
|
||||
self.send(self.request)
|
||||
def handle_write(self):
|
||||
pass
|
||||
def showResult(self,*s):
|
||||
self.response.show()
|
||||
23
DNS/Class.py
Normal file
23
DNS/Class.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
# CLASS values (section 3.2.4)
|
||||
|
||||
IN = 1 # the Internet
|
||||
CS = 2 # the CSNET class (Obsolete - used only for examples in
|
||||
# some obsolete RFCs)
|
||||
CH = 3 # the CHAOS class
|
||||
HS = 4 # Hesiod [Dyer 87]
|
||||
|
||||
# QCLASS values (section 3.2.5)
|
||||
|
||||
ANY = 255 # any class
|
||||
|
||||
|
||||
# Construct reverse mapping dictionary
|
||||
|
||||
_names = dir()
|
||||
classmap = {}
|
||||
for _name in _names:
|
||||
if _name[0] != '_': classmap[eval(_name)] = _name
|
||||
|
||||
def classstr(klass):
|
||||
if classmap.has_key(klass): return classmap[klass]
|
||||
else: return `klass`
|
||||
589
DNS/Lib.py
Normal file
589
DNS/Lib.py
Normal file
|
|
@ -0,0 +1,589 @@
|
|||
# Domain Name Server (DNS) interface
|
||||
#
|
||||
# See RFC 1035:
|
||||
# ------------------------------------------------------------------------
|
||||
# Network Working Group P. Mockapetris
|
||||
# Request for Comments: 1035 ISI
|
||||
# November 1987
|
||||
# Obsoletes: RFCs 882, 883, 973
|
||||
#
|
||||
# DOMAIN NAMES - IMPLEMENTATION AND SPECIFICATION
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
|
||||
import string
|
||||
|
||||
import DNS.Type
|
||||
import DNS.Class
|
||||
import DNS.Opcode
|
||||
import DNS.Status
|
||||
|
||||
|
||||
# Low-level 16 and 32 bit integer packing and unpacking
|
||||
|
||||
def pack16bit(n):
|
||||
return chr((n>>8)&0xFF) + chr(n&0xFF)
|
||||
|
||||
def pack32bit(n):
|
||||
return chr((n>>24)&0xFF) + chr((n>>16)&0xFF) \
|
||||
+ chr((n>>8)&0xFF) + chr(n&0xFF)
|
||||
|
||||
def unpack16bit(s):
|
||||
return (ord(s[0])<<8) | ord(s[1])
|
||||
|
||||
def unpack32bit(s):
|
||||
return (ord(s[0])<<24) | (ord(s[1])<<16) \
|
||||
| (ord(s[2])<<8) | ord(s[3])
|
||||
|
||||
def addr2bin(addr):
|
||||
if type(addr) == type(0):
|
||||
return addr
|
||||
bytes = string.splitfields(addr, '.')
|
||||
if len(bytes) != 4: raise ValueError, 'bad IP address'
|
||||
n = 0
|
||||
for byte in bytes: n = n<<8 | string.atoi(byte)
|
||||
return n
|
||||
|
||||
def bin2addr(n):
|
||||
return '%d.%d.%d.%d' % ((n>>24)&0xFF, (n>>16)&0xFF,
|
||||
(n>>8)&0xFF, n&0xFF)
|
||||
|
||||
|
||||
# Packing class
|
||||
|
||||
class Packer:
|
||||
def __init__(self):
|
||||
self.buf = ''
|
||||
self.index = {}
|
||||
def getbuf(self):
|
||||
return self.buf
|
||||
def addbyte(self, c):
|
||||
if len(c) != 1: raise TypeError, 'one character expected'
|
||||
self.buf = self.buf + c
|
||||
def addbytes(self, bytes):
|
||||
self.buf = self.buf + bytes
|
||||
def add16bit(self, n):
|
||||
self.buf = self.buf + pack16bit(n)
|
||||
def add32bit(self, n):
|
||||
self.buf = self.buf + pack32bit(n)
|
||||
def addaddr(self, addr):
|
||||
n = addr2bin(addr)
|
||||
self.buf = self.buf + pack32bit(n)
|
||||
def addstring(self, s):
|
||||
self.addbyte(chr(len(s)))
|
||||
self.addbytes(s)
|
||||
def addname(self, name):
|
||||
# Domain name packing (section 4.1.4)
|
||||
# Add a domain name to the buffer, possibly using pointers.
|
||||
# The case of the first occurrence of a name is preserved.
|
||||
# Redundant dots are ignored.
|
||||
list = []
|
||||
for label in string.splitfields(name, '.'):
|
||||
if label:
|
||||
if len(label) > 63:
|
||||
raise PackError, 'label too long'
|
||||
list.append(label)
|
||||
keys = []
|
||||
for i in range(len(list)):
|
||||
key = string.upper(string.joinfields(list[i:], '.'))
|
||||
keys.append(key)
|
||||
if self.index.has_key(key):
|
||||
pointer = self.index[key]
|
||||
break
|
||||
else:
|
||||
i = len(list)
|
||||
pointer = None
|
||||
# Do it into temporaries first so exceptions don't
|
||||
# mess up self.index and self.buf
|
||||
buf = ''
|
||||
offset = len(self.buf)
|
||||
index = []
|
||||
for j in range(i):
|
||||
label = list[j]
|
||||
n = len(label)
|
||||
if offset + len(buf) < 0x3FFF:
|
||||
index.append(keys[j], offset + len(buf))
|
||||
else:
|
||||
print 'DNS.Lib.Packer.addname:',
|
||||
print 'warning: pointer too big'
|
||||
buf = buf + (chr(n) + label)
|
||||
if pointer:
|
||||
buf = buf + pack16bit(pointer | 0xC000)
|
||||
else:
|
||||
buf = buf + '\0'
|
||||
self.buf = self.buf + buf
|
||||
for key, value in index:
|
||||
self.index[key] = value
|
||||
def dump(self):
|
||||
keys = self.index.keys()
|
||||
keys.sort()
|
||||
print '-'*40
|
||||
for key in keys:
|
||||
print '%20s %3d' % (key, self.index[key])
|
||||
print '-'*40
|
||||
space = 1
|
||||
for i in range(0, len(self.buf)+1, 2):
|
||||
if self.buf[i:i+2] == '**':
|
||||
if not space: print
|
||||
space = 1
|
||||
continue
|
||||
space = 0
|
||||
print '%4d' % i,
|
||||
for c in self.buf[i:i+2]:
|
||||
if ' ' < c < '\177':
|
||||
print ' %c' % c,
|
||||
else:
|
||||
print '%2d' % ord(c),
|
||||
print
|
||||
print '-'*40
|
||||
|
||||
|
||||
# Unpacking class
|
||||
|
||||
UnpackError = 'DNS.Lib.UnpackError' # Exception
|
||||
|
||||
class Unpacker:
|
||||
def __init__(self, buf):
|
||||
self.buf = buf
|
||||
self.offset = 0
|
||||
def getbyte(self):
|
||||
c = self.buf[self.offset]
|
||||
self.offset = self.offset + 1
|
||||
return c
|
||||
def getbytes(self, n):
|
||||
s = self.buf[self.offset : self.offset + n]
|
||||
if len(s) != n: raise UnpackError, 'not enough data left'
|
||||
self.offset = self.offset + n
|
||||
return s
|
||||
def get16bit(self):
|
||||
return unpack16bit(self.getbytes(2))
|
||||
def get32bit(self):
|
||||
return unpack32bit(self.getbytes(4))
|
||||
def getaddr(self):
|
||||
return bin2addr(self.get32bit())
|
||||
def getstring(self):
|
||||
return self.getbytes(ord(self.getbyte()))
|
||||
def getname(self):
|
||||
# Domain name unpacking (section 4.1.4)
|
||||
c = self.getbyte()
|
||||
i = ord(c)
|
||||
if i & 0xC0 == 0xC0:
|
||||
d = self.getbyte()
|
||||
j = ord(d)
|
||||
pointer = ((i<<8) | j) & ~0xC000
|
||||
save_offset = self.offset
|
||||
try:
|
||||
self.offset = pointer
|
||||
domain = self.getname()
|
||||
finally:
|
||||
self.offset = save_offset
|
||||
return domain
|
||||
if i == 0:
|
||||
return ''
|
||||
domain = self.getbytes(i)
|
||||
remains = self.getname()
|
||||
if not remains:
|
||||
return domain
|
||||
else:
|
||||
return domain + '.' + remains
|
||||
|
||||
|
||||
# Test program for packin/unpacking (section 4.1.4)
|
||||
|
||||
def testpacker():
|
||||
N = 25
|
||||
R = range(N)
|
||||
import timing
|
||||
# See section 4.1.4 of RFC 1035
|
||||
timing.start()
|
||||
for i in R:
|
||||
p = Packer()
|
||||
p.addbytes('*' * 20)
|
||||
p.addname('f.ISI.ARPA')
|
||||
p.addbytes('*' * 8)
|
||||
p.addname('Foo.F.isi.arpa')
|
||||
p.addbytes('*' * 18)
|
||||
p.addname('arpa')
|
||||
p.addbytes('*' * 26)
|
||||
p.addname('')
|
||||
timing.finish()
|
||||
print round(timing.milli() * 0.001 / N, 3), 'seconds per packing'
|
||||
p.dump()
|
||||
u = Unpacker(p.buf)
|
||||
u.getbytes(20)
|
||||
u.getname()
|
||||
u.getbytes(8)
|
||||
u.getname()
|
||||
u.getbytes(18)
|
||||
u.getname()
|
||||
u.getbytes(26)
|
||||
u.getname()
|
||||
timing.start()
|
||||
for i in R:
|
||||
u = Unpacker(p.buf)
|
||||
res = (u.getbytes(20),
|
||||
u.getname(),
|
||||
u.getbytes(8),
|
||||
u.getname(),
|
||||
u.getbytes(18),
|
||||
u.getname(),
|
||||
u.getbytes(26),
|
||||
u.getname())
|
||||
timing.finish()
|
||||
print round(timing.milli() * 0.001 / N, 3), 'seconds per unpacking'
|
||||
for item in res: print item
|
||||
|
||||
|
||||
# Pack/unpack RR toplevel format (section 3.2.1)
|
||||
|
||||
class RRpacker(Packer):
|
||||
def __init__(self):
|
||||
Packer.__init__(self)
|
||||
self.rdstart = None
|
||||
def addRRheader(self, name, type, klass, ttl, *rest):
|
||||
self.addname(name)
|
||||
self.add16bit(type)
|
||||
self.add16bit(klass)
|
||||
self.add32bit(ttl)
|
||||
if rest:
|
||||
if res[1:]: raise TypeError, 'too many args'
|
||||
rdlength = rest[0]
|
||||
else:
|
||||
rdlength = 0
|
||||
self.add16bit(rdlength)
|
||||
self.rdstart = len(self.buf)
|
||||
def patchrdlength(self):
|
||||
rdlength = unpack16bit(self.buf[self.rdstart-2:self.rdstart])
|
||||
if rdlength == len(self.buf) - self.rdstart:
|
||||
return
|
||||
rdata = self.buf[self.rdstart:]
|
||||
save_buf = self.buf
|
||||
ok = 0
|
||||
try:
|
||||
self.buf = self.buf[:self.rdstart-2]
|
||||
self.add16bit(len(rdata))
|
||||
self.buf = self.buf + rdata
|
||||
ok = 1
|
||||
finally:
|
||||
if not ok: self.buf = save_buf
|
||||
def endRR(self):
|
||||
if self.rdstart is not None:
|
||||
self.patchrdlength()
|
||||
self.rdstart = None
|
||||
def getbuf(self):
|
||||
if self.rdstart is not None: self.patchrdlenth()
|
||||
return Packer.getbuf(self)
|
||||
# Standard RRs (section 3.3)
|
||||
def addCNAME(self, name, klass, ttl, cname):
|
||||
self.addRRheader(name, DNS.Type.CNAME, klass, ttl)
|
||||
self.addname(cname)
|
||||
self.endRR()
|
||||
def addHINFO(self, name, klass, ttl, cpu, os):
|
||||
self.addRRheader(name, DNS.Type.HINFO, klass, ttl)
|
||||
self.addstring(cpu)
|
||||
self.addstring(os)
|
||||
self.endRR()
|
||||
def addMX(self, name, klass, ttl, preference, exchange):
|
||||
self.addRRheader(name, DNS.Type.MX, klass, ttl)
|
||||
self.add16bit(preference)
|
||||
self.addname(exchange)
|
||||
self.endRR()
|
||||
def addNS(self, name, klass, ttl, nsdname):
|
||||
self.addRRheader(name, DNS.Type.NS, klass, ttl)
|
||||
self.addname(nsdname)
|
||||
self.endRR()
|
||||
def addPTR(self, name, klass, ttl, ptrdname):
|
||||
self.addRRheader(name, DNS.Type.PTR, klass, ttl)
|
||||
self.addname(ptrdname)
|
||||
self.endRR()
|
||||
def addSOA(self, name, klass, ttl,
|
||||
mname, rname, serial, refresh, retry, expire, minimum):
|
||||
self.addRRheader(name, DNS.Type.SOA, klass, ttl)
|
||||
self.addname(mname)
|
||||
self.addname(rname)
|
||||
self.add32bit(serial)
|
||||
self.add32bit(refresh)
|
||||
self.add32bit(retry)
|
||||
self.add32bit(expire)
|
||||
self.add32bit(minimum)
|
||||
self.endRR()
|
||||
def addTXT(self, name, klass, ttl, list):
|
||||
self.addRRheader(name, DNS.Type.TXT, klass, ttl)
|
||||
for txtdata in list:
|
||||
self.addstring(txtdata)
|
||||
self.endRR()
|
||||
# Internet specific RRs (section 3.4) -- class = IN
|
||||
def addA(self, name, ttl, address):
|
||||
self.addRRheader(name, DNS.Type.A, DNS.Class.IN, ttl)
|
||||
self.addaddr(address)
|
||||
self.endRR()
|
||||
def addWKS(self, name, ttl, address, protocol, bitmap):
|
||||
self.addRRheader(name, DNS.Type.WKS, DNS.Class.IN, ttl)
|
||||
self.addaddr(address)
|
||||
self.addbyte(chr(protocol))
|
||||
self.addbytes(bitmap)
|
||||
self.endRR()
|
||||
|
||||
def prettyTime(seconds):
|
||||
if seconds<60:
|
||||
return seconds,"%d seconds"%(seconds)
|
||||
if seconds<3600:
|
||||
return seconds,"%d minutes"%(seconds/60)
|
||||
if seconds<86400:
|
||||
return seconds,"%d hours"%(seconds/3600)
|
||||
if seconds<604800:
|
||||
return seconds,"%d days"%(seconds/86400)
|
||||
else:
|
||||
return seconds,"%d weeks"%(seconds/604800)
|
||||
|
||||
|
||||
class RRunpacker(Unpacker):
|
||||
def __init__(self, buf):
|
||||
Unpacker.__init__(self, buf)
|
||||
self.rdend = None
|
||||
def getRRheader(self):
|
||||
name = self.getname()
|
||||
type = self.get16bit()
|
||||
klass = self.get16bit()
|
||||
ttl = self.get32bit()
|
||||
rdlength = self.get16bit()
|
||||
self.rdend = self.offset + rdlength
|
||||
return (name, type, klass, ttl, rdlength)
|
||||
def endRR(self):
|
||||
if self.offset != self.rdend:
|
||||
raise UnpackError, 'end of RR not reached'
|
||||
def getCNAMEdata(self):
|
||||
return self.getname()
|
||||
def getHINFOdata(self):
|
||||
return self.getstring(), self.getstring()
|
||||
def getMXdata(self):
|
||||
return self.get16bit(), self.getname()
|
||||
def getNSdata(self):
|
||||
return self.getname()
|
||||
def getPTRdata(self):
|
||||
return self.getname()
|
||||
def getSOAdata(self):
|
||||
return self.getname(), \
|
||||
self.getname(), \
|
||||
('serial',)+(self.get32bit(),), \
|
||||
('refresh ',)+prettyTime(self.get32bit()), \
|
||||
('retry',)+prettyTime(self.get32bit()), \
|
||||
('expire',)+prettyTime(self.get32bit()), \
|
||||
('minimum',)+prettyTime(self.get32bit())
|
||||
def getTXTdata(self):
|
||||
list = []
|
||||
while self.offset != self.rdend:
|
||||
list.append(self.getstring())
|
||||
return list
|
||||
def getAdata(self):
|
||||
return self.getaddr()
|
||||
def getWKSdata(self):
|
||||
address = self.getaddr()
|
||||
protocol = ord(self.getbyte())
|
||||
bitmap = self.getbytes(self.rdend - self.offset)
|
||||
return address, protocol, bitmap
|
||||
|
||||
|
||||
# Pack/unpack Message Header (section 4.1)
|
||||
|
||||
class Hpacker(Packer):
|
||||
def addHeader(self, id, qr, opcode, aa, tc, rd, ra, z, rcode,
|
||||
qdcount, ancount, nscount, arcount):
|
||||
self.add16bit(id)
|
||||
self.add16bit((qr&1)<<15 | (opcode*0xF)<<11 | (aa&1)<<10
|
||||
| (tc&1)<<9 | (rd&1)<<8 | (ra&1)<<7
|
||||
| (z&7)<<4 | (rcode&0xF))
|
||||
self.add16bit(qdcount)
|
||||
self.add16bit(ancount)
|
||||
self.add16bit(nscount)
|
||||
self.add16bit(arcount)
|
||||
|
||||
class Hunpacker(Unpacker):
|
||||
def getHeader(self):
|
||||
id = self.get16bit()
|
||||
flags = self.get16bit()
|
||||
qr, opcode, aa, tc, rd, ra, z, rcode = (
|
||||
(flags>>15)&1,
|
||||
(flags>>11)&0xF,
|
||||
(flags>>10)&1,
|
||||
(flags>>9)&1,
|
||||
(flags>>8)&1,
|
||||
(flags>>7)&1,
|
||||
(flags>>4)&7,
|
||||
(flags>>0)&0xF)
|
||||
qdcount = self.get16bit()
|
||||
ancount = self.get16bit()
|
||||
nscount = self.get16bit()
|
||||
arcount = self.get16bit()
|
||||
return (id, qr, opcode, aa, tc, rd, ra, z, rcode,
|
||||
qdcount, ancount, nscount, arcount)
|
||||
|
||||
|
||||
# Pack/unpack Question (section 4.1.2)
|
||||
|
||||
class Qpacker(Packer):
|
||||
def addQuestion(self, qname, qtype, qclass):
|
||||
self.addname(qname)
|
||||
self.add16bit(qtype)
|
||||
self.add16bit(qclass)
|
||||
|
||||
class Qunpacker(Unpacker):
|
||||
def getQuestion(self):
|
||||
return self.getname(), self.get16bit(), self.get16bit()
|
||||
|
||||
|
||||
# Pack/unpack Message(section 4)
|
||||
# NB the order of the base classes is important for __init__()!
|
||||
|
||||
class Mpacker(RRpacker, Qpacker, Hpacker):
|
||||
pass
|
||||
|
||||
class Munpacker(RRunpacker, Qunpacker, Hunpacker):
|
||||
pass
|
||||
|
||||
|
||||
# Routines to print an unpacker to stdout, for debugging.
|
||||
# These affect the unpacker's current position!
|
||||
|
||||
def dumpM(u):
|
||||
print 'HEADER:',
|
||||
(id, qr, opcode, aa, tc, rd, ra, z, rcode,
|
||||
qdcount, ancount, nscount, arcount) = u.getHeader()
|
||||
print 'id=%d,' % id,
|
||||
print 'qr=%d, opcode=%d, aa=%d, tc=%d, rd=%d, ra=%d, z=%d, rcode=%d,' \
|
||||
% (qr, opcode, aa, tc, rd, ra, z, rcode)
|
||||
if tc: print '*** response truncated! ***'
|
||||
if rcode: print '*** nonzero error code! (%d) ***' % rcode
|
||||
print ' qdcount=%d, ancount=%d, nscount=%d, arcount=%d' \
|
||||
% (qdcount, ancount, nscount, arcount)
|
||||
for i in range(qdcount):
|
||||
print 'QUESTION %d:' % i,
|
||||
dumpQ(u)
|
||||
for i in range(ancount):
|
||||
print 'ANSWER %d:' % i,
|
||||
dumpRR(u)
|
||||
for i in range(nscount):
|
||||
print 'AUTHORITY RECORD %d:' % i,
|
||||
dumpRR(u)
|
||||
for i in range(arcount):
|
||||
print 'ADDITIONAL RECORD %d:' % i,
|
||||
dumpRR(u)
|
||||
|
||||
class DnsResult:
|
||||
|
||||
def __init__(self,u,args):
|
||||
self.header={}
|
||||
self.questions=[]
|
||||
self.answers=[]
|
||||
self.authority=[]
|
||||
self.additional=[]
|
||||
self.args=args
|
||||
self.storeM(u)
|
||||
|
||||
def show(self):
|
||||
import time
|
||||
print '; <<>> PDG.py 1.0 <<>> %s %s'%(self.args['name'],
|
||||
self.args['qtype'])
|
||||
opt=""
|
||||
if self.args['rd']:
|
||||
opt=opt+'recurs '
|
||||
h=self.header
|
||||
print ';; options: '+opt
|
||||
print ';; got answer:'
|
||||
print ';; ->>HEADER<<- opcode %s, status %s, id %d'%(
|
||||
h['opcode'],h['status'],h['id'])
|
||||
flags=filter(lambda x,h=h:h[x],('qr','aa','rd','ra','tc'))
|
||||
print ';; flags: %s; Ques: %d, Ans: %d, Auth: %d, Addit: %d'%(
|
||||
string.join(flags),h['qdcount'],h['ancount'],h['nscount'],
|
||||
h['arcount'])
|
||||
print ';; QUESTIONS:'
|
||||
for q in self.questions:
|
||||
print ';; %s, type = %s, class = %s'%(q['qname'],q['qtypestr'],
|
||||
q['qclassstr'])
|
||||
print
|
||||
print ';; ANSWERS:'
|
||||
for a in self.answers:
|
||||
print '%-20s %-6s %-6s %s'%(a['name'],`a['ttl']`,a['typename'],
|
||||
a['data'])
|
||||
print
|
||||
print ';; AUTHORITY RECORDS:'
|
||||
for a in self.authority:
|
||||
print '%-20s %-6s %-6s %s'%(a['name'],`a['ttl']`,a['typename'],
|
||||
a['data'])
|
||||
print
|
||||
print ';; ADDITIONAL RECORDS:'
|
||||
for a in self.additional:
|
||||
print '%-20s %-6s %-6s %s'%(a['name'],`a['ttl']`,a['typename'],
|
||||
a['data'])
|
||||
print
|
||||
if self.args.has_key('elapsed'):
|
||||
print ';; Total query time: %d msec'%self.args['elapsed']
|
||||
print ';; To SERVER: %s'%(self.args['server'])
|
||||
print ';; WHEN: %s'%time.ctime(time.time())
|
||||
|
||||
def storeM(self,u):
|
||||
(self.header['id'], self.header['qr'], self.header['opcode'],
|
||||
self.header['aa'], self.header['tc'], self.header['rd'],
|
||||
self.header['ra'], self.header['z'], self.header['rcode'],
|
||||
self.header['qdcount'], self.header['ancount'],
|
||||
self.header['nscount'], self.header['arcount']) = u.getHeader()
|
||||
self.header['opcodestr']=DNS.Opcode.opcodestr(self.header['opcode'])
|
||||
self.header['status']=DNS.Status.statusstr(self.header['rcode'])
|
||||
for i in range(self.header['qdcount']):
|
||||
#print 'QUESTION %d:' % i,
|
||||
self.questions.append(self.storeQ(u))
|
||||
for i in range(self.header['ancount']):
|
||||
#print 'ANSWER %d:' % i,
|
||||
self.answers.append(self.storeRR(u))
|
||||
for i in range(self.header['nscount']):
|
||||
#print 'AUTHORITY RECORD %d:' % i,
|
||||
self.authority.append(self.storeRR(u))
|
||||
for i in range(self.header['arcount']):
|
||||
#print 'ADDITIONAL RECORD %d:' % i,
|
||||
self.additional.append(self.storeRR(u))
|
||||
|
||||
def storeQ(self,u):
|
||||
q={}
|
||||
q['qname'], q['qtype'], q['qclass'] = u.getQuestion()
|
||||
q['qtypestr']=DNS.Type.typestr(q['qtype'])
|
||||
q['qclassstr']=DNS.Class.classstr(q['qclass'])
|
||||
return q
|
||||
|
||||
def storeRR(self,u):
|
||||
r={}
|
||||
r['name'],r['type'],r['class'],r['ttl'],r['rdlength'] = u.getRRheader()
|
||||
r['typename'] = DNS.Type.typestr(r['type'])
|
||||
r['classstr'] = DNS.Class.classstr(r['class'])
|
||||
#print 'name=%s, type=%d(%s), class=%d(%s), ttl=%d' \
|
||||
# % (name,
|
||||
# type, typename,
|
||||
# klass, DNS.Class.classstr(class),
|
||||
# ttl)
|
||||
mname = 'get%sdata' % r['typename']
|
||||
if hasattr(u, mname):
|
||||
r['data']=getattr(u, mname)()
|
||||
else:
|
||||
r['data']=u.getbytes(rdlength)
|
||||
return r
|
||||
|
||||
def dumpQ(u):
|
||||
qname, qtype, qclass = u.getQuestion()
|
||||
print 'qname=%s, qtype=%d(%s), qclass=%d(%s)' \
|
||||
% (qname,
|
||||
qtype, DNS.Type.typestr(qtype),
|
||||
qclass, DNS.Class.classstr(qclass))
|
||||
|
||||
def dumpRR(u):
|
||||
name, type, klass, ttl, rdlength = u.getRRheader()
|
||||
typename = DNS.Type.typestr(type)
|
||||
print 'name=%s, type=%d(%s), class=%d(%s), ttl=%d' \
|
||||
% (name,
|
||||
type, typename,
|
||||
klass, DNS.Class.classstr(klass),
|
||||
ttl)
|
||||
mname = 'get%sdata' % typename
|
||||
if hasattr(u, mname):
|
||||
print ' formatted rdata:', getattr(u, mname)()
|
||||
else:
|
||||
print ' binary rdata:', u.getbytes(rdlength)
|
||||
|
||||
16
DNS/Opcode.py
Normal file
16
DNS/Opcode.py
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
# Opcode values in message header (section 4.1.1)
|
||||
|
||||
QUERY = 0
|
||||
IQUERY = 1
|
||||
STATUS = 2
|
||||
|
||||
# Construct reverse mapping dictionary
|
||||
|
||||
_names = dir()
|
||||
opcodemap = {}
|
||||
for _name in _names:
|
||||
if _name[0] != '_': opcodemap[eval(_name)] = _name
|
||||
|
||||
def opcodestr(opcode):
|
||||
if opcodemap.has_key(opcode): return opcodemap[opcode]
|
||||
else: return `opcode`
|
||||
19
DNS/Status.py
Normal file
19
DNS/Status.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
# Status values in message header
|
||||
|
||||
NOERROR = 0
|
||||
FORMERR = 1
|
||||
SERVFAIL = 2
|
||||
NXDOMAIN = 3
|
||||
NOTIMP = 4
|
||||
REFUSED = 5
|
||||
|
||||
# Construct reverse mapping dictionary
|
||||
|
||||
_names = dir()
|
||||
statusmap = {}
|
||||
for _name in _names:
|
||||
if _name[0] != '_': statusmap[eval(_name)] = _name
|
||||
|
||||
def statusstr(status):
|
||||
if statusmap.has_key(status): return statusmap[status]
|
||||
else: return `status`
|
||||
42
DNS/Type.py
Normal file
42
DNS/Type.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
# TYPE values (section 3.2.2)
|
||||
|
||||
A = 1 # a host address
|
||||
NS = 2 # an authoritative name server
|
||||
MD = 3 # a mail destination (Obsolete - use MX)
|
||||
MF = 4 # a mail forwarder (Obsolete - use MX)
|
||||
CNAME = 5 # the canonical name for an alias
|
||||
SOA = 6 # marks the start of a zone of authority
|
||||
MB = 7 # a mailbox domain name (EXPERIMENTAL)
|
||||
MG = 8 # a mail group member (EXPERIMENTAL)
|
||||
MR = 9 # a mail rename domain name (EXPERIMENTAL)
|
||||
NULL = 10 # a null RR (EXPERIMENTAL)
|
||||
WKS = 11 # a well known service description
|
||||
PTR = 12 # a domain name pointer
|
||||
HINFO = 13 # host information
|
||||
MINFO = 14 # mailbox or mail list information
|
||||
MX = 15 # mail exchange
|
||||
TXT = 16 # text strings
|
||||
AAAA = 28 # IPv6 AAAA records (RFC 1886)
|
||||
|
||||
# Additional TYPE values from host.c source
|
||||
|
||||
UNAME = 110
|
||||
MP = 240
|
||||
|
||||
# QTYPE values (section 3.2.3)
|
||||
|
||||
AXFR = 252 # A request for a transfer of an entire zone
|
||||
MAILB = 253 # A request for mailbox-related records (MB, MG or MR)
|
||||
MAILA = 254 # A request for mail agent RRs (Obsolete - see MX)
|
||||
ANY = 255 # A request for all records
|
||||
|
||||
# Construct reverse mapping dictionary
|
||||
|
||||
_names = dir()
|
||||
typemap = {}
|
||||
for _name in _names:
|
||||
if _name[0] != '_': typemap[eval(_name)] = _name
|
||||
|
||||
def typestr(type):
|
||||
if typemap.has_key(type): return typemap[type]
|
||||
else: return `type`
|
||||
10
DNS/__init__.py
Normal file
10
DNS/__init__.py
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
# __init__.py for DNS class.
|
||||
|
||||
Error='DNS API error'
|
||||
import Type,Opcode,Status,Class
|
||||
from Base import *
|
||||
from Lib import *
|
||||
from lazy import *
|
||||
Request = DnsRequest
|
||||
Result = DnsResult
|
||||
|
||||
266
DNS/asyncore.py
Normal file
266
DNS/asyncore.py
Normal file
|
|
@ -0,0 +1,266 @@
|
|||
# -*- Mode: Python; tab-width: 4 -*-
|
||||
# $Id$
|
||||
# Author: Sam Rushing <rushing@nightmare.com>
|
||||
|
||||
# A simple unix version of the asynchronous socket support.
|
||||
# There are lots of problems with this still - I only wrote it to show
|
||||
# that it could be done, and for my own testing purposes.
|
||||
# [960206: servtest, asynfing, asynhttp, and pop3demo work, asyndns doesn't.]
|
||||
# [960321: servtest, asynfing, asynhttp, pop3demo, pop3_2 work]
|
||||
import select
|
||||
import socket
|
||||
import sys
|
||||
|
||||
# you need to generate ERRNO.py from Tools/scripts/h2py.py in the Python
|
||||
# distribution.
|
||||
|
||||
try:
|
||||
import ERRNO
|
||||
except ImportError:
|
||||
raise ImportError,'you need to generate ERRNO.py from Tools/scripts/h2py.py in the Python distribution'
|
||||
|
||||
# look what I can get away with... 8^)
|
||||
socket.socket_map = {}
|
||||
|
||||
ALL_EVENTS = []
|
||||
|
||||
DEFAULT_TIMEOUT = 30.0
|
||||
|
||||
loop_running = 0
|
||||
|
||||
stop_loop_exception = "stop running the select loop"
|
||||
|
||||
# we want to select for read only those sockets
|
||||
# to which we are already connected to, -OR- those
|
||||
# sockets we are accepting on.
|
||||
def readables (sock_fds):
|
||||
sm = socket.socket_map
|
||||
def readable_test (fd, sm=sm):
|
||||
sock = sm[fd]
|
||||
return sock.connected or sock.accepting
|
||||
return filter (readable_test, sock_fds)
|
||||
|
||||
# only those fd's we are 'write blocked' on, -OR-
|
||||
# those sockets we are waiting for a connection on.
|
||||
def writables (sock_fds):
|
||||
sm = socket.socket_map
|
||||
def writable_test (fd, sm=sm):
|
||||
sock = sm[fd]
|
||||
return sock.write_blocked or not sock.connected
|
||||
return filter (writable_test, sock_fds)
|
||||
|
||||
def loop(timeout=DEFAULT_TIMEOUT):
|
||||
loop_running = 1
|
||||
try:
|
||||
while 1:
|
||||
sock_fds = socket.socket_map.keys()
|
||||
|
||||
read_fds = readables (sock_fds)
|
||||
write_fds = writables (sock_fds)
|
||||
expt_fds = sock_fds[:]
|
||||
|
||||
(read_fds,
|
||||
write_fds,
|
||||
expt_fds) = select.select (read_fds,
|
||||
write_fds,
|
||||
expt_fds,
|
||||
timeout)
|
||||
print read_fds,write_fds,expt_fds
|
||||
try:
|
||||
for x in expt_fds:
|
||||
socket.socket_map[x].handle_expt_event()
|
||||
for x in read_fds:
|
||||
socket.socket_map[x].handle_read_event()
|
||||
for x in write_fds:
|
||||
socket.socket_map[x].handle_write_event()
|
||||
except KeyError:
|
||||
# handle_expt handle_read might remove as socket
|
||||
# from the map by calling self.close().
|
||||
pass
|
||||
except stop_loop_exception:
|
||||
print 'loop stopped'
|
||||
|
||||
class dispatcher:
|
||||
def __init__ (self, sock=None):
|
||||
self.debug = 0
|
||||
self.log_queue = []
|
||||
self.connected = 0
|
||||
self.accepting = 0
|
||||
self.write_blocked = 1
|
||||
if sock:
|
||||
self.socket = sock
|
||||
self.fileno = self.socket.fileno()
|
||||
# I think it should inherit this anyway
|
||||
self.socket.setblocking (0)
|
||||
self.connected = 1
|
||||
self.add_channel()
|
||||
|
||||
def add_channel (self, events=ALL_EVENTS):
|
||||
self.log ('adding channel %s' % self)
|
||||
socket.socket_map [self.fileno] = self
|
||||
|
||||
def del_channel (self):
|
||||
if socket.socket_map.has_key (self.fileno):
|
||||
del socket.socket_map [self.fileno]
|
||||
if not len(socket.socket_map.keys()):
|
||||
raise stop_loop_exception
|
||||
|
||||
def create_socket (self, family, type):
|
||||
self.socket = socket.socket (family, type)
|
||||
self.socket.setblocking(0)
|
||||
self.fileno = self.socket.fileno()
|
||||
self.add_channel()
|
||||
|
||||
def bind (self, *args):
|
||||
return apply (self.socket.bind, args)
|
||||
|
||||
def go (self):
|
||||
if not loop_running:
|
||||
loop()
|
||||
|
||||
def listen (self, num):
|
||||
self.accepting = 1
|
||||
self.socket.listen (num)
|
||||
|
||||
def accept (self):
|
||||
return self.socket.accept()
|
||||
|
||||
def connect (self, host, port):
|
||||
try:
|
||||
self.socket.connect (host, port)
|
||||
except socket.error, why:
|
||||
if type(why) == type(()) \
|
||||
and why[0] in (ERRNO.EINPROGRESS, ERRNO.EALREADY, ERRNO.EWOULDBLOCK):
|
||||
return
|
||||
else:
|
||||
raise socket.error, why
|
||||
self.connected = 1
|
||||
self.handle_connect()
|
||||
|
||||
def send (self, data):
|
||||
try:
|
||||
result = self.socket.send (data)
|
||||
if result != len(data):
|
||||
self.write_blocked = 1
|
||||
else:
|
||||
self.write_blocked = 0
|
||||
return result
|
||||
except socket.error, why:
|
||||
if type(why) == type(()) and why[0] == ERRNO.EWOULDBLOCK:
|
||||
self.write_blocked = 1
|
||||
return 0
|
||||
else:
|
||||
raise socket.error, why
|
||||
return 0
|
||||
|
||||
def recv (self, buffer_size):
|
||||
data = self.socket.recv (buffer_size)
|
||||
if not data:
|
||||
self.handle_close()
|
||||
return ''
|
||||
else:
|
||||
return data
|
||||
|
||||
def close (self):
|
||||
self.socket.close()
|
||||
self.del_channel()
|
||||
|
||||
def shutdown (self, how):
|
||||
self.socket.shutdown (how)
|
||||
|
||||
def log (self, message):
|
||||
#self.log_queue.append ('%s:%d %s' %
|
||||
# (self.__class__.__name__, self.fileno, message))
|
||||
print 'log:', message
|
||||
|
||||
def done (self):
|
||||
self.print_log()
|
||||
|
||||
def print_log (self):
|
||||
for x in self.log_queue:
|
||||
print x
|
||||
|
||||
def handle_read_event (self):
|
||||
# getting a read implies that we are connected
|
||||
if not self.connected:
|
||||
self.handle_connect()
|
||||
self.connected = 1
|
||||
self.handle_read()
|
||||
elif self.accepting:
|
||||
if not self.connected:
|
||||
self.connected = 1
|
||||
self.handle_accept()
|
||||
else:
|
||||
self.handle_read()
|
||||
|
||||
def more_to_send (self, yesno=1):
|
||||
self.write_blocked = yesno
|
||||
|
||||
def handle_write_event (self):
|
||||
# getting a read implies that we are connected
|
||||
if not self.connected:
|
||||
self.handle_connect()
|
||||
self.connected = 1
|
||||
self.write_blocked = 0
|
||||
self.handle_write()
|
||||
|
||||
def handle_expt_event (self):
|
||||
self.handle_error()
|
||||
|
||||
def handle_error (self, error=0):
|
||||
self.close()
|
||||
|
||||
def handle_read (self):
|
||||
self.log ('unhandled FD_READ')
|
||||
|
||||
def handle_write (self):
|
||||
self.log ('unhandled FD_WRITE')
|
||||
|
||||
def handle_connect (self):
|
||||
self.log ('unhandled FD_CONNECT')
|
||||
|
||||
def handle_oob (self):
|
||||
self.log ('unhandled FD_OOB')
|
||||
|
||||
def handle_accept (self):
|
||||
self.log ('unhandled FD_ACCEPT')
|
||||
|
||||
def handle_close (self):
|
||||
self.log ('unhandled FD_CLOSE')
|
||||
|
||||
def handle_disconnect (self, error):
|
||||
self.log ('unexpected disconnect, error:%d' % error)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# adds async send capability, useful for simple clients.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class dispatcher_with_send (dispatcher):
|
||||
def __init__ (self, sock=None):
|
||||
dispatcher.__init__ (self, sock)
|
||||
self.out_buffer = ''
|
||||
|
||||
def initiate_send (self):
|
||||
while self.out_buffer:
|
||||
num_sent = 0
|
||||
num_sent = dispatcher.send (self, self.out_buffer[:512])
|
||||
self.out_buffer = self.out_buffer[num_sent:]
|
||||
|
||||
def handle_write (self):
|
||||
self.initiate_send()
|
||||
|
||||
def send (self, data):
|
||||
if self.debug:
|
||||
self.log ('sending %s' % repr(data))
|
||||
self.out_buffer = data
|
||||
self.initiate_send()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# used a lot when debugging
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def close_all ():
|
||||
for x in socket.socket_map.items():
|
||||
x[1].socket.close()
|
||||
socket.socket_map = {}
|
||||
|
||||
24
DNS/lazy.py
Normal file
24
DNS/lazy.py
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
# $Id$
|
||||
# routines for lazy people.
|
||||
import Base
|
||||
|
||||
def revlookup(name):
|
||||
"convenience routine for doing a reverse lookup of an address"
|
||||
import string
|
||||
a = string.split(name, '.')
|
||||
a.reverse()
|
||||
b = string.join(a, '.')+'.in-addr.arpa'
|
||||
# this will only return one of any records returned.
|
||||
return Base.DnsRequest(b, qtype = 'ptr').req().answers[0]['data']
|
||||
|
||||
def mxlookup(name):
|
||||
"""
|
||||
convenience routine for doing an MX lookup of a name. returns a
|
||||
sorted list of (preference, mail exchanger) records
|
||||
"""
|
||||
|
||||
a = Base.DnsRequest(name, qtype = 'mx').req().answers
|
||||
l = map(lambda x:x['data'], a)
|
||||
l.sort()
|
||||
return l
|
||||
|
||||
43
GML/GMLLexer.py
Normal file
43
GML/GMLLexer.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
import sys,re
|
||||
import PyLR
|
||||
|
||||
def _intfunc(m):
|
||||
return int(m.group(0))
|
||||
|
||||
def _realfunc(m):
|
||||
return float(m.group(0))
|
||||
|
||||
class GMLLexer(PyLR.Lexer):
|
||||
"""The GML lexical scanner."""
|
||||
def __init__(self):
|
||||
PyLR.Lexer.__init__(self)
|
||||
self.addpat(r"[-+]?(\d+\.\d*|\d*\.\d+)([Ee][-+]?\d+)?",
|
||||
"REAL", _realfunc)
|
||||
self.addpat(r"[-+]?\d+", "INT", _intfunc)
|
||||
self.addpat(r"\[", "LSQB")
|
||||
self.addpat(r"\]", "RSQB")
|
||||
self.addpat(r'"([^&"]+|&[a-zA-Z]+;)*"', "STRING")
|
||||
self.addpat(r"[a-zA-Z][a-zA-Z0-9]*", "KEY")
|
||||
self.addpat(r"#[^\n]*", "", None, PyLR.SKIPTOK)
|
||||
self.addpat(r"\s+", "", None, PyLR.SKIPTOK)
|
||||
|
||||
def _test():
|
||||
gmltest = """# a graph example
|
||||
graph [ # comment at end of line
|
||||
node [
|
||||
real1 1.e3
|
||||
real2 .01
|
||||
int1 00050
|
||||
label "Wallerfang&Ballern"
|
||||
]
|
||||
]
|
||||
"""
|
||||
# create the lexer
|
||||
lexer = GMLLexer()
|
||||
lexer.settext(gmltest)
|
||||
tok=1
|
||||
while tok:
|
||||
tok, val = lexer.scan(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
_test()
|
||||
45
GML/grammarspec.txt
Normal file
45
GML/grammarspec.txt
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
# a GML parser
|
||||
# Here is the GML grammar
|
||||
# corrected from me because the original at
|
||||
# http://www.uni-passau.de/Graphlet/GML had some errors
|
||||
#
|
||||
# corrections are
|
||||
# (1) use instring* in string
|
||||
# (2) add character,lowercase,uppercase definitions
|
||||
# (3) skip whitespace definition, this is obvious
|
||||
# (4) use digit+ in mantissa
|
||||
# (5) either intpart or fraction of a real must contain a number
|
||||
# (6) comments can be on a separate or at the end of the line
|
||||
#
|
||||
# gml: list
|
||||
# list: (whitespace* key whitespace+ value)*
|
||||
# value: integer | real | string | "[" list "]"
|
||||
# key: character (character | digit)*
|
||||
# integer: sign digit+
|
||||
# real: sign (digit+ "." digit* | digit* "." digit+) mantissa
|
||||
# string: """ instring* """
|
||||
# sign: "+" | "-" |
|
||||
# digit: "0"..."9"
|
||||
# character: lowercase | uppercase
|
||||
# lowercase: "a"..."z"
|
||||
# uppercase: "A"..."Z"
|
||||
# mantissa: ("E"|"e") sign digit+ |
|
||||
# instring: <ASCII except "&" and """> | "&" character+ ";"
|
||||
#
|
||||
# Note that integers and reals can have prefixed zeros, e.g. 001 is 1
|
||||
|
||||
_class GMLParser
|
||||
_code import GMLLexer
|
||||
_lex GMLLexer.GMLLexer()
|
||||
|
||||
# manually reduced
|
||||
"""
|
||||
list: list KEY value (key_value) |
|
||||
(endoflist) ;
|
||||
value: INTEGER |
|
||||
REAL |
|
||||
STRING |
|
||||
LSQB list RSQB (beginlist) ;
|
||||
"""
|
||||
|
||||
|
||||
28
INSTALL
Normal file
28
INSTALL
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
LinkChecker installation
|
||||
==========================
|
||||
|
||||
First, decompress the archive.
|
||||
With linkchecker-x.x.x.tar.bz2 do "tar xIvf linkchecker-x.x.x.tar.bz2".
|
||||
With linkchecker-x.x.x.zip do "unzip linkchecker-x.x.x.zip" or use Winzip.
|
||||
With linkchecker-x.x.x.deb do "dpkg -i linkchecker-x.x.x.deb" as root and you
|
||||
are done.
|
||||
|
||||
Unix Users:
|
||||
1. Edit the file linkchecker.
|
||||
Adjust the argument to sys.path.append to point to the distribution
|
||||
directory.
|
||||
2. Copy linkchecker to a location in your PATH (or make a symlink).
|
||||
3. Check links happily by typing `linkchecker`.
|
||||
|
||||
Windows Users:
|
||||
1. Edit the file linkchecker.
|
||||
Adjust the argument to sys.path.append to point to the distribution
|
||||
directory.
|
||||
2. Edit the file linkchecker.bat.
|
||||
a) Adjust the PYHTON variable to point to python.exe.
|
||||
b) Adjust the LINKCHECKER variable to point to the distribution directory.
|
||||
3. Add the distribution directory to your PATH.
|
||||
4. Check links happily by typing `linkchecker.bat`.
|
||||
|
||||
You need Python >= 1.5.2
|
||||
You get Python from http://www.python.org
|
||||
339
LICENSE
Normal file
339
LICENSE
Normal file
|
|
@ -0,0 +1,339 @@
|
|||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 2, June 1991
|
||||
|
||||
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
|
||||
675 Mass Ave, Cambridge, MA 02139, USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your
|
||||
freedom to share and change it. By contrast, the GNU General Public
|
||||
License is intended to guarantee your freedom to share and change free
|
||||
software--to make sure the software is free for all its users. This
|
||||
General Public License applies to most of the Free Software
|
||||
Foundation's software and to any other program whose authors commit to
|
||||
using it. (Some other Free Software Foundation software is covered by
|
||||
the GNU Library General Public License instead.) You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
this service if you wish), that you receive source code or can get it
|
||||
if you want it, that you can change the software or use pieces of it
|
||||
in new free programs; and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
anyone to deny you these rights or to ask you to surrender the rights.
|
||||
These restrictions translate to certain responsibilities for you if you
|
||||
distribute copies of the software, or if you modify it.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must give the recipients all the rights that
|
||||
you have. You must make sure that they, too, receive or can get the
|
||||
source code. And you must show them these terms so they know their
|
||||
rights.
|
||||
|
||||
We protect your rights with two steps: (1) copyright the software, and
|
||||
(2) offer you this license which gives you legal permission to copy,
|
||||
distribute and/or modify the software.
|
||||
|
||||
Also, for each author's protection and ours, we want to make certain
|
||||
that everyone understands that there is no warranty for this free
|
||||
software. If the software is modified by someone else and passed on, we
|
||||
want its recipients to know that what they have is not the original, so
|
||||
that any problems introduced by others will not reflect on the original
|
||||
authors' reputations.
|
||||
|
||||
Finally, any free program is threatened constantly by software
|
||||
patents. We wish to avoid the danger that redistributors of a free
|
||||
program will individually obtain patent licenses, in effect making the
|
||||
program proprietary. To prevent this, we have made it clear that any
|
||||
patent must be licensed for everyone's free use or not licensed at all.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License applies to any program or other work which contains
|
||||
a notice placed by the copyright holder saying it may be distributed
|
||||
under the terms of this General Public License. The "Program", below,
|
||||
refers to any such program or work, and a "work based on the Program"
|
||||
means either the Program or any derivative work under copyright law:
|
||||
that is to say, a work containing the Program or a portion of it,
|
||||
either verbatim or with modifications and/or translated into another
|
||||
language. (Hereinafter, translation is included without limitation in
|
||||
the term "modification".) Each licensee is addressed as "you".
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of
|
||||
running the Program is not restricted, and the output from the Program
|
||||
is covered only if its contents constitute a work based on the
|
||||
Program (independent of having been made by running the Program).
|
||||
Whether that is true depends on what the Program does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Program's
|
||||
source code as you receive it, in any medium, provided that you
|
||||
conspicuously and appropriately publish on each copy an appropriate
|
||||
copyright notice and disclaimer of warranty; keep intact all the
|
||||
notices that refer to this License and to the absence of any warranty;
|
||||
and give any other recipients of the Program a copy of this License
|
||||
along with the Program.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy, and
|
||||
you may at your option offer warranty protection in exchange for a fee.
|
||||
|
||||
2. You may modify your copy or copies of the Program or any portion
|
||||
of it, thus forming a work based on the Program, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
above, provided that you also meet all of these conditions:
|
||||
|
||||
a) You must cause the modified files to carry prominent notices
|
||||
stating that you changed the files and the date of any change.
|
||||
|
||||
b) You must cause any work that you distribute or publish, that in
|
||||
whole or in part contains or is derived from the Program or any
|
||||
part thereof, to be licensed as a whole at no charge to all third
|
||||
parties under the terms of this License.
|
||||
|
||||
c) If the modified program normally reads commands interactively
|
||||
when run, you must cause it, when started running for such
|
||||
interactive use in the most ordinary way, to print or display an
|
||||
announcement including an appropriate copyright notice and a
|
||||
notice that there is no warranty (or else, saying that you provide
|
||||
a warranty) and that users may redistribute the program under
|
||||
these conditions, and telling the user how to view a copy of this
|
||||
License. (Exception: if the Program itself is interactive but
|
||||
does not normally print such an announcement, your work based on
|
||||
the Program is not required to print an announcement.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If
|
||||
identifiable sections of that work are not derived from the Program,
|
||||
and can be reasonably considered independent and separate works in
|
||||
themselves, then this License, and its terms, do not apply to those
|
||||
sections when you distribute them as separate works. But when you
|
||||
distribute the same sections as part of a whole which is a work based
|
||||
on the Program, the distribution of the whole must be on the terms of
|
||||
this License, whose permissions for other licensees extend to the
|
||||
entire whole, and thus to each and every part regardless of who wrote it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Program.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Program
|
||||
with the Program (or with a work based on the Program) on a volume of
|
||||
a storage or distribution medium does not bring the other work under
|
||||
the scope of this License.
|
||||
|
||||
3. You may copy and distribute the Program (or a work based on it,
|
||||
under Section 2) in object code or executable form under the terms of
|
||||
Sections 1 and 2 above provided that you also do one of the following:
|
||||
|
||||
a) Accompany it with the complete corresponding machine-readable
|
||||
source code, which must be distributed under the terms of Sections
|
||||
1 and 2 above on a medium customarily used for software interchange; or,
|
||||
|
||||
b) Accompany it with a written offer, valid for at least three
|
||||
years, to give any third party, for a charge no more than your
|
||||
cost of physically performing source distribution, a complete
|
||||
machine-readable copy of the corresponding source code, to be
|
||||
distributed under the terms of Sections 1 and 2 above on a medium
|
||||
customarily used for software interchange; or,
|
||||
|
||||
c) Accompany it with the information you received as to the offer
|
||||
to distribute corresponding source code. (This alternative is
|
||||
allowed only for noncommercial distribution and only if you
|
||||
received the program in object code or executable form with such
|
||||
an offer, in accord with Subsection b above.)
|
||||
|
||||
The source code for a work means the preferred form of the work for
|
||||
making modifications to it. For an executable work, complete source
|
||||
code means all the source code for all modules it contains, plus any
|
||||
associated interface definition files, plus the scripts used to
|
||||
control compilation and installation of the executable. However, as a
|
||||
special exception, the source code distributed need not include
|
||||
anything that is normally distributed (in either source or binary
|
||||
form) with the major components (compiler, kernel, and so on) of the
|
||||
operating system on which the executable runs, unless that component
|
||||
itself accompanies the executable.
|
||||
|
||||
If distribution of executable or object code is made by offering
|
||||
access to copy from a designated place, then offering equivalent
|
||||
access to copy the source code from the same place counts as
|
||||
distribution of the source code, even though third parties are not
|
||||
compelled to copy the source along with the object code.
|
||||
|
||||
4. You may not copy, modify, sublicense, or distribute the Program
|
||||
except as expressly provided under this License. Any attempt
|
||||
otherwise to copy, modify, sublicense or distribute the Program is
|
||||
void, and will automatically terminate your rights under this License.
|
||||
However, parties who have received copies, or rights, from you under
|
||||
this License will not have their licenses terminated so long as such
|
||||
parties remain in full compliance.
|
||||
|
||||
5. You are not required to accept this License, since you have not
|
||||
signed it. However, nothing else grants you permission to modify or
|
||||
distribute the Program or its derivative works. These actions are
|
||||
prohibited by law if you do not accept this License. Therefore, by
|
||||
modifying or distributing the Program (or any work based on the
|
||||
Program), you indicate your acceptance of this License to do so, and
|
||||
all its terms and conditions for copying, distributing or modifying
|
||||
the Program or works based on it.
|
||||
|
||||
6. Each time you redistribute the Program (or any work based on the
|
||||
Program), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute or modify the Program subject to
|
||||
these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties to
|
||||
this License.
|
||||
|
||||
7. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot
|
||||
distribute so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you
|
||||
may not distribute the Program at all. For example, if a patent
|
||||
license would not permit royalty-free redistribution of the Program by
|
||||
all those who receive copies directly or indirectly through you, then
|
||||
the only way you could satisfy both it and this License would be to
|
||||
refrain entirely from distribution of the Program.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under
|
||||
any particular circumstance, the balance of the section is intended to
|
||||
apply and the section as a whole is intended to apply in other
|
||||
circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system, which is
|
||||
implemented by public license practices. Many people have made
|
||||
generous contributions to the wide range of software distributed
|
||||
through that system in reliance on consistent application of that
|
||||
system; it is up to the author/donor to decide if he or she is willing
|
||||
to distribute software through any other system and a licensee cannot
|
||||
impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to
|
||||
be a consequence of the rest of this License.
|
||||
|
||||
8. If the distribution and/or use of the Program is restricted in
|
||||
certain countries either by patents or by copyrighted interfaces, the
|
||||
original copyright holder who places the Program under this License
|
||||
may add an explicit geographical distribution limitation excluding
|
||||
those countries, so that distribution is permitted only in or among
|
||||
countries not thus excluded. In such case, this License incorporates
|
||||
the limitation as if written in the body of this License.
|
||||
|
||||
9. The Free Software Foundation may publish revised and/or new versions
|
||||
of the General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Program
|
||||
specifies a version number of this License which applies to it and "any
|
||||
later version", you have the option of following the terms and conditions
|
||||
either of that version or of any later version published by the Free
|
||||
Software Foundation. If the Program does not specify a version number of
|
||||
this License, you may choose any version ever published by the Free Software
|
||||
Foundation.
|
||||
|
||||
10. If you wish to incorporate parts of the Program into other free
|
||||
programs whose distribution conditions are different, write to the author
|
||||
to ask for permission. For software which is copyrighted by the Free
|
||||
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||
make exceptions for this. Our decision will be guided by the two goals
|
||||
of preserving the free status of all derivatives of our free software and
|
||||
of promoting the sharing and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||
REPAIR OR CORRECTION.
|
||||
|
||||
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
convey the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) 19yy <name of author>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program is interactive, make it output a short notice like this
|
||||
when it starts in an interactive mode:
|
||||
|
||||
Gnomovision version 69, Copyright (C) 19yy name of author
|
||||
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, the commands you use may
|
||||
be called something other than `show w' and `show c'; they could even be
|
||||
mouse-clicks or menu items--whatever suits your program.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1989
|
||||
Ty Coon, President of Vice
|
||||
|
||||
This General Public License does not permit incorporating your program into
|
||||
proprietary programs. If your program is a subroutine library, you may
|
||||
consider it more useful to permit linking proprietary applications with the
|
||||
library. If this is what you want to do, use the GNU Library General
|
||||
Public License instead of this License.
|
||||
53
Makefile
Normal file
53
Makefile
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
VERSION=0.9.0
|
||||
HOST=treasure.calvinsplayground.de
|
||||
#HOST=fsinfo.cs.uni-sb.de
|
||||
PACKAGE = linkchecker
|
||||
BZ2PACKAGE = $(PACKAGE)-$(VERSION).tar.bz2
|
||||
DEBPACKAGE = $(PACKAGE)_$(VERSION)_i386.deb
|
||||
ZIPPACKAGE = $(PACKAGE)-$(VERSION).zip
|
||||
ALLPACKAGES = ../$(BZ2PACKAGE) ../$(DEBPACKAGE) ../$(ZIPPACKAGE)
|
||||
.PHONY: test clean files install all
|
||||
TAR = tar
|
||||
ZIP = zip
|
||||
prefix = /usr/local
|
||||
|
||||
all:
|
||||
|
||||
clean:
|
||||
rm -f $(ALLPACKAGES) $(PACKAGE)-out.*
|
||||
|
||||
files: all
|
||||
./$(PACKAGE) -q -Wtext -Whtml -Wgml -Wsql -R -r2 -v -i "$(HOST)" http://$(HOST)/~calvin/
|
||||
|
||||
install: install-dirs
|
||||
install -m644 linkcheck/*.py? $(DESTDIR)/usr/share/$(PACKAGE)/linkcheck
|
||||
install -m644 DNS/*.py? $(DESTDIR)/usr/share/$(PACKAGE)/DNS
|
||||
install -m644 *.py? $(DESTDIR)/usr/share/$(PACKAGE)
|
||||
install -m755 $(PACKAGE) $(DESTDIR)/usr/bin
|
||||
install -m644 $(PACKAGE)rc $(DESTDIR)/etc
|
||||
|
||||
install-dirs:
|
||||
install -d -m755 \
|
||||
$(DESTDIR)/usr/share/$(PACKAGE)/linkcheck \
|
||||
$(DESTDIR)/usr/share/$(PACKAGE)/DNS \
|
||||
$(DESTDIR)/usr/share/$(PACKAGE)/GML \
|
||||
$(DESTDIR)/usr/share/$(PACKAGE)/PyLR \
|
||||
$(DESTDIR)/usr/bin \
|
||||
$(DESTDIR)/etc
|
||||
|
||||
dist: files
|
||||
dh_clean
|
||||
cd .. && $(TAR) cIhf $(BZ2PACKAGE) $(PACKAGE)
|
||||
cd .. && $(ZIP) -r $(ZIPPACKAGE) $(PACKAGE)
|
||||
fakeroot debian/rules binary
|
||||
|
||||
package:
|
||||
cd .. && $(TAR) cIhf $(BZ2PACKAGE) $(PACKAGE)
|
||||
|
||||
test:
|
||||
rm -f test/*.result
|
||||
@for i in test/*.html; do \
|
||||
echo "Testing $$i. Results are in $$i.result"; \
|
||||
./$(PACKAGE) -v -a $$i > $$i.result 2>&1; \
|
||||
done
|
||||
|
||||
853
PyLR/Grammar.py
Normal file
853
PyLR/Grammar.py
Normal file
|
|
@ -0,0 +1,853 @@
|
|||
__version__ = "$Id$"
|
||||
|
||||
import time,string,types,parsertemplate
|
||||
|
||||
class PyLRParseError(ParseError):
|
||||
pass
|
||||
|
||||
class Production:
|
||||
"""Production -- a Grammar is really just a list of productions.
|
||||
The expected structure is a symbol for the LHS and a list of
|
||||
symbols or symbols for the RHS."""
|
||||
def __init__(self, LHS, RHS, funcname="unspecified"):
|
||||
self.LHS = LHS
|
||||
self.RHS = RHS
|
||||
self.funcname = funcname
|
||||
self.func = None # will be assigned dynamically
|
||||
self.toklist = None
|
||||
|
||||
def setfunc(self, func):
|
||||
""".setfunc(<callable>) --used for the dynamic production
|
||||
of a parseengine directly from Grammar.mkengine(), instead of tables
|
||||
saved to a file."""
|
||||
self.func = func
|
||||
|
||||
def setfuncname(self, name):
|
||||
""".setfuncname("") -- used by Grammar.writefile to produce
|
||||
prodinfo table that. .setfunc associates a function value
|
||||
with the production for runtime, on the fly productions
|
||||
of parsing engine from Grammar."""
|
||||
self.funcname = name
|
||||
|
||||
def __len__(self):
|
||||
return len(self.RHS)
|
||||
|
||||
def __repr__(self):
|
||||
return self.getrep()
|
||||
|
||||
def getrep(self, toklist=None):
|
||||
s = self.LHS+":"
|
||||
for t in self.RHS:
|
||||
if type(t)==types.IntType and toklist:
|
||||
s = s+" "+toklist[t]
|
||||
else:
|
||||
s = s+" "+str(t)
|
||||
if self.funcname: s = s+" ("+self.funcname+")"
|
||||
return s
|
||||
|
||||
def items(self):
|
||||
return range(len(self.RHS) + 1)
|
||||
|
||||
|
||||
class LR1Grammar:
|
||||
"""Provides methods for producing the actiontable, the gototable, and the
|
||||
prodinfo table. Using these functions, it can produce a python source
|
||||
code file with these tables or a parsing engine.
|
||||
Note that we assume the first production (productions[0]) to be the start
|
||||
symbol."""
|
||||
|
||||
EPS = "<EPS>"
|
||||
EOF = "<EOF>"
|
||||
DummyLA = -1
|
||||
|
||||
def __init__(self, productions, tokens=[], verbose=0):
|
||||
self.verbose = verbose
|
||||
self.productions = productions
|
||||
self.tokens = tokens
|
||||
self.nonterminals = []
|
||||
for p in self.productions:
|
||||
if p.LHS not in self.nonterminals:
|
||||
self.nonterminals.append(p.LHS)
|
||||
if self.verbose:
|
||||
print "Nonterminals:", self.nonterminals
|
||||
self.terminals = []
|
||||
for p in self.productions:
|
||||
for s in p.RHS:
|
||||
if not (s in self.terminals or s in self.nonterminals):
|
||||
self.terminals.append(s)
|
||||
self.terminals.sort()
|
||||
if self.verbose:
|
||||
print "Terminals:", self.terminals
|
||||
# reduce the grammar
|
||||
self._reduceGrammar()
|
||||
# build map with productions who have the same LHS
|
||||
self.lhsprods = {}
|
||||
for lhs in self.nonterminals:
|
||||
self.lhsprods[lhs] = filter(lambda x,l=lhs: x.LHS==l, self.productions)
|
||||
# immediate epsilon productions
|
||||
pi = 1
|
||||
self.epslhs = {}
|
||||
for p in self.productions:
|
||||
if p.RHS == []:
|
||||
self.epslhs[p.LHS] = pi
|
||||
pi = pi + 1
|
||||
# derived epsilon productions
|
||||
self.lhsdereps = self._mklhsdereps()
|
||||
# the FIRST function for the LR(1) grammar, implemented as a map
|
||||
self.firstmap = self._mkfirstmap()
|
||||
|
||||
def _reduceGrammar(self):
|
||||
"""Definitions:
|
||||
(1) not productive
|
||||
a nonterminal A is not productive iff there is no
|
||||
word u with A ==>* u
|
||||
This means A produces no words in the grammar.
|
||||
(2) not reachable
|
||||
a nonterminal A is no reachable iff there are no words
|
||||
a,b with S ==>* aAb
|
||||
This means A occurs never in a parsetree if we derive a word.
|
||||
|
||||
This function eliminates all nonterminals which are not productive
|
||||
or not reachable.
|
||||
If we reduce the start symbol, the grammar produces nothing and
|
||||
a ParseException is thrown.
|
||||
|
||||
References: [R. Wilhelm, D.Maurer: "Ubersetzerbau, p. 300f]
|
||||
"""
|
||||
# productive nonterminals
|
||||
productive_nts = []
|
||||
# rest_nt[p] == the number of nonterminals in p.RHS which are not yet
|
||||
# marked as productive
|
||||
# if rest_nt[p]==0 then p is productive
|
||||
rest_nt = {}
|
||||
# if we find a productive nonterminal A, we have to inspect all
|
||||
# other nonterminals with A. this is the reason we add all found
|
||||
# productive nts to this list
|
||||
workedon_nts = []
|
||||
# mark terminals as productive (even epsilon-prductions)
|
||||
for p in self.productions:
|
||||
rest_nt[p]= len(filter(lambda x, s=self: x in s.nonterminals, p.RHS))
|
||||
if rest_nt[p]==0:
|
||||
productive_nts[p] = 1
|
||||
workedon_nts.append(p)
|
||||
# work on the productive list
|
||||
while len(workedon_nts):
|
||||
x = workedon_nts[0]
|
||||
# search for production p with x in p.RHS
|
||||
for p in filter(lambda p, _x=x: _x in p.RHS, self.productions):
|
||||
rest_nt[p] = rest_nt[p] - 1
|
||||
if not p.LHS in productive_nts:
|
||||
productive_nts.append(p.LHS)
|
||||
workedon_nts.append(p.LHS)
|
||||
workedon_nts.remove(x)
|
||||
if not self.productions[0].LHS in productive_nts:
|
||||
raise PyLRParseError, "start symbol of grammar is not productive"
|
||||
|
||||
# reachable nonterminals
|
||||
reachable_nts = self.productions[0]
|
||||
added=1
|
||||
while added:
|
||||
added = 0
|
||||
for p in self.productions:
|
||||
for r in p.RHS:
|
||||
if p.LHS in reachable_nts and (r in self.nonterminals and
|
||||
r not in reachable_nts):
|
||||
reachable_nts.append(r)
|
||||
added = 1
|
||||
|
||||
# reduce the grammar
|
||||
self.productions = filter(lambda p,
|
||||
pnt=productive_nts,
|
||||
rnt=reachable_nts: p.LHS in pnt or p.LHS in rnt,
|
||||
self.productions)
|
||||
|
||||
def __repr__(self):
|
||||
"""I like functional programming :)"""
|
||||
return string.join(map(lambda x,s=self: x.getrep(s.tokens),
|
||||
self.productions),";\n")+";"
|
||||
|
||||
def _mklhsdereps(self):
|
||||
"""determines the nonterminals that derive nothing (epsilon)"""
|
||||
pi = 1
|
||||
res = {}
|
||||
for p in self.productions:
|
||||
if p.RHS == []:
|
||||
res[p.LHS] = pi
|
||||
pi = pi + 1
|
||||
workingnonterms = []
|
||||
for nt in self.nonterminals:
|
||||
if not res.has_key(nt):
|
||||
workingnonterms.append(nt)
|
||||
while 1:
|
||||
toremove = []
|
||||
for nt in workingnonterms:
|
||||
if not res.has_key(nt):
|
||||
for p in self.lhsprods[nt]:
|
||||
if len(p.RHS) == 1 and res.has_key(p.RHS[0]):
|
||||
res[p.LHS] = res[p.RHS[0]]
|
||||
toremove.append(nt)
|
||||
break
|
||||
if not toremove:
|
||||
break
|
||||
for r in toremove:
|
||||
workingnonterms.remove(r)
|
||||
return res
|
||||
|
||||
|
||||
def _mkfirstmap(self):
|
||||
"""return a dictionary keyed by symbol whose values are the set
|
||||
of terminals that can precede that symbol
|
||||
"""
|
||||
res = {}
|
||||
for sym in self.terminals+[Grammar.EPS, Grammar.EOF, Grammar.DummyLA]:
|
||||
res[sym] = {sym: 1}
|
||||
added=1
|
||||
while added:
|
||||
added = 0
|
||||
for nt in self.nonterminals:
|
||||
firsts = res.get(nt, {})
|
||||
for p in self.lhsprods[nt]:
|
||||
if not p.RHS:
|
||||
if not firsts.has_key(Grammar.EPS):
|
||||
added = firsts[Grammar.EPS] = 1
|
||||
for i in range(len(p.RHS)):
|
||||
f = res.get(p.RHS[i], {})
|
||||
for t in f.keys():
|
||||
if not firsts.has_key(t):
|
||||
added = firsts[t] = 1
|
||||
if not self.lhsdereps.has_key(p.RHS[i]):
|
||||
break
|
||||
res[nt] = firsts
|
||||
for s in res.keys():
|
||||
res[s] = res[s].keys()
|
||||
return res
|
||||
|
||||
|
||||
# these function are used as the grammar produces the tables (or writes
|
||||
# them to a file)
|
||||
def firstofstring(self, gs_list):
|
||||
tmpres = {}
|
||||
allhaveeps = 1
|
||||
for x in range(len(gs_list)):
|
||||
tmp = self.firstmap[gs_list[x]]
|
||||
for s in tmp:
|
||||
tmpres[s] = 1
|
||||
if Grammar.EPS in tmp:
|
||||
del tmpres[Grammar.EPS]
|
||||
else:
|
||||
allhaveeps = 0
|
||||
break
|
||||
if allhaveeps:
|
||||
tmpres[Grammar.EPS] = 1
|
||||
return tmpres.keys()
|
||||
|
||||
|
||||
|
||||
def augment(self):
|
||||
"""this function adds a production S' -> S to the grammar where S was
|
||||
the start symbol.
|
||||
"""
|
||||
lhss = map(lambda x: x.LHS, self.productions)
|
||||
newsym = self.productions[0].LHS
|
||||
while 1:
|
||||
newsym = newsym + "'"
|
||||
if newsym not in lhss:
|
||||
break
|
||||
self.productions.insert(0, Production(newsym,
|
||||
[self.productions[0].LHS]))
|
||||
|
||||
|
||||
# follow is not used yet, but probably will be in determining error reporting/recovery
|
||||
def follow(self):
|
||||
eof = Grammar.EOF
|
||||
follow = {}
|
||||
startsym = self.productions[0].LHS
|
||||
follow[startsym] = [eof]
|
||||
nts = self.nonterminals
|
||||
for p in self.productions:
|
||||
cutoff = range(len(p.RHS))
|
||||
cutoff.reverse()
|
||||
for c in cutoff[:-1]: # all but the first of the RHS elements
|
||||
f = self.firstmap[p.RHS[c]]
|
||||
if Grammar.EPS in f:
|
||||
f.remove(Grammar.EPS)
|
||||
if follow.has_key(p.RHS[c - 1]):
|
||||
if p.RHS[c -1] in nts:
|
||||
follow[p.RHS[c -1]] = follow[p.RHS[c - 1]] + f[:]
|
||||
else:
|
||||
if p.RHS[c -1] in nts:
|
||||
follow[p.RHS[c - 1]] = f[:]
|
||||
for p in self.productions:
|
||||
if not p.RHS: continue
|
||||
cutoff = range(len(p.RHS))
|
||||
cutoff.reverse()
|
||||
if p.RHS[-1] in nts:
|
||||
if follow.has_key(p.LHS):
|
||||
add = follow[p.LHS]
|
||||
else:
|
||||
add = []
|
||||
|
||||
if follow.has_key(p.RHS[-1]):
|
||||
follow[p.RHS[-1]] = follow[p.RHS[-1]] + add
|
||||
else:
|
||||
follow[p.RHS[-1]] = add
|
||||
for c in cutoff[:-1]:
|
||||
f = self.firstmap[p.RHS[c]]
|
||||
if Grammar.EPS in f:
|
||||
if follow.has_key(p.LHS):
|
||||
add = follow[p.LHS]
|
||||
else:
|
||||
add = []
|
||||
if follow.has_key(p.RHS[c-1]):
|
||||
follow[p.RHS[c-1]] = follow[p.RHS[c-1]] + add
|
||||
elif add:
|
||||
follow[p.RHS[c - 1]] = add
|
||||
for k in follow.keys():
|
||||
d = {}
|
||||
for i in follow[k]:
|
||||
d[i] = 1
|
||||
follow[k] = d.keys()
|
||||
return follow
|
||||
|
||||
def closure(self, items):
|
||||
res = items[:]
|
||||
todo = items[:]
|
||||
more = 1
|
||||
while more:
|
||||
more = []
|
||||
for (prodind, rhsind), term in todo:
|
||||
if rhsind >= len(self.productions[prodind].RHS):
|
||||
continue
|
||||
for p in self.lhsprods.get(self.productions[prodind].RHS[rhsind], []):
|
||||
try:
|
||||
newpart = self.productions[prodind].RHS[rhsind + 1]
|
||||
except IndexError:
|
||||
newpart = Grammar.EPS
|
||||
stringofsyms = [newpart, term]
|
||||
for t in self.firstofstring(stringofsyms):
|
||||
if ((self.productions.index(p), 0), t) not in res:
|
||||
more.append(((self.productions.index(p), 0), t))
|
||||
if term == Grammar.EOF and newpart == Grammar.EPS:
|
||||
if ((self.productions.index(p), 0), Grammar.EOF) not in res:
|
||||
more.append(((self.productions.index(p), 0), Grammar.EOF))
|
||||
if more:
|
||||
res = res + more
|
||||
todo = more
|
||||
return res
|
||||
|
||||
def goto(self, items, sym):
|
||||
itemset = []
|
||||
for (prodind, rhsind), term in items:
|
||||
try:
|
||||
if self.productions[prodind].RHS[rhsind] == sym and ((prodind, rhsind+1), term) not in itemset:
|
||||
itemset.append( ((prodind, rhsind +1), term))
|
||||
except IndexError:
|
||||
pass
|
||||
return self.closure(itemset)
|
||||
|
||||
def default_prodfunc(self):
|
||||
"""for mkengine, this will produce a default function for those
|
||||
unspecified
|
||||
"""
|
||||
return lambda *args: args[0]
|
||||
|
||||
def prodinfotable(self):
|
||||
"""returns a list of three pieces of info for each production.
|
||||
The first is the lenght of the production, the second is the
|
||||
function(name) associated with the production and the third is
|
||||
is the index of the lhs in a list of nonterminals.
|
||||
"""
|
||||
res = []
|
||||
for p in self.productions:
|
||||
lhsind = self.nonterminals.index(p.LHS)
|
||||
func = p.func
|
||||
if not func:
|
||||
func = self.default_prodfunc()
|
||||
plen = len(p.RHS)
|
||||
if p.RHS == [Grammar.EPS]:
|
||||
plen = 0
|
||||
res.append((plen, func, lhsind))
|
||||
return res
|
||||
|
||||
|
||||
class LALRGrammar(LR1Grammar):
|
||||
def __init__(self, prods, toks=[]):
|
||||
Grammar.__init__(self, prods, toks)
|
||||
self.LALRitems = []
|
||||
#
|
||||
# this is to help mak epsilon productions work with kernel items
|
||||
# and to compute goto transitions from kernel
|
||||
print "computing ntfirsts..."
|
||||
self.ntfirstmap = self._mkntfirstmap()
|
||||
#
|
||||
# this is to help make shifts work with only kernel items
|
||||
print "computing tfirsts..."
|
||||
self.tfirstmap = self._mktfirstmap()
|
||||
#
|
||||
# another thing to help epsilon productions
|
||||
print "computing follows..."
|
||||
self.followmap = self.follow()
|
||||
|
||||
def _mkntfirstmap(self):
|
||||
"""computes all nonterms A, first of (strings n) such that some
|
||||
nonterminal B derives [A, n] in zero or more steps of (rightmost)
|
||||
derivation. used to help make epsilon productions quickly calculable.
|
||||
(B may == A)
|
||||
"""
|
||||
res = {}
|
||||
for p in self.productions:
|
||||
if p.RHS and p.RHS[0] in self.nonterminals:
|
||||
fos = self.firstofstring(p.RHS[1:])
|
||||
fos.sort()
|
||||
if not res.has_key(p.LHS):
|
||||
res[p.LHS] = {}
|
||||
if not res[p.LHS].has_key(p.RHS[0]):
|
||||
res[p.LHS][p.RHS[0]] = []
|
||||
for i in fos:
|
||||
if i not in res[p.LHS].get(p.RHS[0], []):
|
||||
res[p.LHS][p.RHS[0]] = fos
|
||||
|
||||
while 1:
|
||||
foundmore = 0
|
||||
reskeys = res.keys()
|
||||
for nt in reskeys:
|
||||
rhsdict = res[nt]
|
||||
for rnt in rhsdict.keys():
|
||||
if rnt in reskeys:
|
||||
d = res[rnt]
|
||||
for k in d.keys():
|
||||
if not res[nt].has_key(k):
|
||||
fos = self.firstofstring(d[k]+ res[nt][rnt])
|
||||
foundmore = 1
|
||||
fos.sort()
|
||||
res[nt][k] = fos
|
||||
else:
|
||||
fos = self.firstofstring(d[k] + res[nt][rnt])
|
||||
fos.sort()
|
||||
if fos != res[nt][k]: # then res[nt][k] is contained in fos
|
||||
foundmore = 1
|
||||
res[nt][k] = fos
|
||||
if not foundmore:
|
||||
break
|
||||
#
|
||||
# this part accounts for the fact that a nonterminal will
|
||||
# produce exactly itself in zero steps
|
||||
#
|
||||
for p in self.productions:
|
||||
if res.has_key(p.LHS):
|
||||
res[p.LHS][p.LHS] = [Grammar.EPS]
|
||||
else:
|
||||
res[p.LHS] = {p.LHS: [Grammar.EPS]}
|
||||
return res
|
||||
|
||||
def newmkntfirstmap(self):
|
||||
"""computes all nonterms A, first of (strings n) such that some
|
||||
nonterminal B derives [A, n] in zero or more steps of (rightmost)
|
||||
derivation. used to help make epsilon productions quickly calculable.
|
||||
(B may == A)
|
||||
"""
|
||||
res = {}
|
||||
pi = 0
|
||||
for p in self.productions:
|
||||
if p.RHS and p.RHS[0] in self.nonterminals:
|
||||
if not res.has_key(p.LHS):
|
||||
res[p.LHS] = {}
|
||||
if not res[p.LHS].has_key(p.RHS[0]):
|
||||
res[p.LHS][p.RHS[0]] = 1
|
||||
while 1:
|
||||
foundmore = 0
|
||||
reskeys = res.keys()
|
||||
for nt in reskeys:
|
||||
rhsdict = res[nt]
|
||||
for rnt in rhsdict.keys():
|
||||
if rnt in reskeys:
|
||||
d = res[rnt]
|
||||
for k in d.keys():
|
||||
if not res[nt].has_key(k):
|
||||
foundmore = 1
|
||||
res[nt][k] = 1
|
||||
if not foundmore:
|
||||
break
|
||||
#
|
||||
# this part accounts for the fact that a nonterminal will
|
||||
# produce exactly itself in zero steps
|
||||
#
|
||||
for p in self.productions:
|
||||
if res.has_key(p.LHS):
|
||||
res[p.LHS][p.LHS] = 1
|
||||
else:
|
||||
res[p.LHS] = {p.LHS: 1}
|
||||
return res
|
||||
|
||||
|
||||
|
||||
def _mktfirstmap(self):
|
||||
"""for each nonterminal C, compute the set of all terminals a, such
|
||||
that C derives ax in zero or more steps of (rightmost) derivation
|
||||
where the last derivation is not an epsilon (empty) production.
|
||||
|
||||
assumes .mkfirstntmap() has been run and has already produced
|
||||
self.ntfirstmap
|
||||
"""
|
||||
res = {}
|
||||
for p in self.productions:
|
||||
if not res.has_key(p.LHS):
|
||||
res[p.LHS] = []
|
||||
if p.RHS and p.RHS[0] in self.terminals:
|
||||
res[p.LHS].append(p.RHS[0])
|
||||
while 1:
|
||||
foundmore = 0
|
||||
reskeys = res.keys()
|
||||
for nt in self.ntfirstmap.keys():
|
||||
arrows = self.ntfirstmap[nt]
|
||||
for k in arrows.keys():
|
||||
for t in res[k]:
|
||||
if t not in res[nt]:
|
||||
foundmore = 1
|
||||
res[nt].append(t)
|
||||
if not foundmore:
|
||||
break
|
||||
return res
|
||||
|
||||
def goto(self, itemset, sym):
|
||||
res = []
|
||||
for (pi, ri) in itemset:
|
||||
if ri == len(self.productions[pi].RHS):
|
||||
continue
|
||||
s = self.productions[pi].RHS[ri]
|
||||
if s == sym:
|
||||
res.append((pi, ri+1))
|
||||
d = self.ntfirstmap.get(s, {})
|
||||
for k in d.keys():
|
||||
for p in self.lhsprods[k]:
|
||||
if p.RHS and p.RHS[0] == sym:
|
||||
i = self.productions.index(p)
|
||||
if (i, 1) not in res: res.append((i, 1))
|
||||
res.sort()
|
||||
return res
|
||||
|
||||
def lookaheads(self, itemset):
|
||||
setsofitems = kernels = self.kernelitems
|
||||
spontaneous = []
|
||||
propagates = {}
|
||||
gotomap = {}
|
||||
for (kpi, kri) in itemset:
|
||||
C = self.closure([((kpi, kri), Grammar.DummyLA)])
|
||||
for (cpi, cri), t in C:
|
||||
if (cri) == len(self.productions[cpi].RHS):
|
||||
continue
|
||||
s = self.productions[cpi].RHS[cri]
|
||||
if gotomap.has_key(s):
|
||||
newstate = gotomap[s]
|
||||
else:
|
||||
newstate = setsofitems.index(self.goto(itemset, s))
|
||||
gotomap[s] = newstate
|
||||
if t != Grammar.DummyLA:
|
||||
spontaneous.append((newstate, (cpi, cri+1), t))
|
||||
else:
|
||||
if propagates.has_key((kpi, kri)):
|
||||
propagates[(kpi, kri)].append((newstate, (cpi, cri+1)))
|
||||
else:
|
||||
propagates[(kpi, kri)]=[(newstate, (cpi, cri+1))]
|
||||
return spontaneous, propagates
|
||||
|
||||
def kernelsoflalr1items(self):
|
||||
res = [[(0, 0)]]
|
||||
todo = [[(0, 0)]]
|
||||
while 1:
|
||||
newtodo = []
|
||||
for items in todo:
|
||||
for s in self.terminals + self.nonterminals + [Grammar.EOF]:
|
||||
g = self.goto(items, s)
|
||||
if g and g not in res:
|
||||
newtodo.append(g)
|
||||
if not newtodo:
|
||||
break
|
||||
else:
|
||||
if self.verbose:
|
||||
print "found %d more kernels" % (len(newtodo))
|
||||
res = res + newtodo
|
||||
todo = newtodo
|
||||
res.sort()
|
||||
return res
|
||||
|
||||
def initLALR1items(self):
|
||||
self.kernelitems = kernels = self.kernelsoflalr1items()
|
||||
props = {}
|
||||
la_table = []
|
||||
for x in range(len(kernels)):
|
||||
la_table.append([])
|
||||
for y in range(len(kernels[x])):
|
||||
la_table[x].append([])
|
||||
la_table[0][0] = [Grammar.EOF]
|
||||
if self.verbose:
|
||||
print "initLALR1items, kernels done, calculating propagations and spontaneous lookaheads"
|
||||
state_i = 0
|
||||
for itemset in kernels:
|
||||
if self.verbose:
|
||||
print ".",
|
||||
sp, pr = self.lookaheads(itemset)
|
||||
for ns, (pi, ri), t in sp:
|
||||
inner = kernels[ns].index((pi, ri))
|
||||
la_table[ns][inner].append(t)
|
||||
props[state_i] = pr
|
||||
state_i = state_i + 1
|
||||
return la_table, props
|
||||
|
||||
def LALR1items(self):
|
||||
la_table, props = self.initLALR1items()
|
||||
if self.verbose:
|
||||
print "done init LALR1items"
|
||||
soi = self.kernelitems
|
||||
while 1:
|
||||
added_la = 0
|
||||
state_i = 0
|
||||
for state in la_table:
|
||||
ii = 0
|
||||
for propterms in state:
|
||||
if not propterms:
|
||||
ii = ii + 1
|
||||
continue
|
||||
item = soi[state_i][ii]
|
||||
ii = ii + 1
|
||||
try:
|
||||
proplist = props[state_i][item]
|
||||
except KeyError:
|
||||
continue
|
||||
for pstate, pitem in proplist:
|
||||
inner = soi[pstate].index(pitem)
|
||||
for pt in propterms:
|
||||
if pt not in la_table[pstate][inner]:
|
||||
added_la = 1
|
||||
la_table[pstate][inner].append(pt)
|
||||
state_i = state_i + 1
|
||||
if not added_la:
|
||||
break
|
||||
#
|
||||
# this section just reorganizes the above data
|
||||
# to the state it's used in later...
|
||||
#
|
||||
if self.verbose:
|
||||
print "done with lalr1items, reorganizing the data"
|
||||
res = []
|
||||
state_i = 0
|
||||
for state in soi:
|
||||
item_i = 0
|
||||
inner = []
|
||||
for item in state:
|
||||
for term in la_table[state_i][item_i]:
|
||||
if (item, term) not in inner:
|
||||
inner.append((item, term))
|
||||
item_i = item_i + 1
|
||||
inner.sort()
|
||||
res.append(inner)
|
||||
state_i = state_i + 1
|
||||
self.LALRitems = res
|
||||
return res
|
||||
|
||||
def deriveN(self, nt1, nt2):
|
||||
"""
|
||||
assuming nt1 -> nt2 <some string>, what is <some string>? such that
|
||||
we know it as 1) a set of terminals and 2) whether it contains
|
||||
Grammar.EPS
|
||||
"""
|
||||
pass
|
||||
|
||||
def actiontable(self):
|
||||
items = self.LALRitems
|
||||
res = []
|
||||
state_i = 0
|
||||
terms = self.terminals[:]
|
||||
terms.append(Grammar.EOF)
|
||||
errentry = ("", -1)
|
||||
for state in items:
|
||||
list = [errentry] * len(terms)
|
||||
res.append(list)
|
||||
for (prodind, rhsind), term in state:
|
||||
if (rhsind ) == len(self.productions[prodind].RHS):
|
||||
if prodind != 0:
|
||||
new = ("r", prodind)
|
||||
old = res[state_i][terms.index(term)]
|
||||
if old != errentry and old != new:
|
||||
print "Conflict[%d,%d]:" % (state_i, terms.index(term)), old, "->", new
|
||||
res[state_i][terms.index(term)] = new
|
||||
else:
|
||||
new = ("a", -1)
|
||||
old = res[state_i][terms.index(term)]
|
||||
if old != errentry and old != new:
|
||||
print "Conflict[%d,%d]:" % (state_i, terms.index(term)), old, "->", new
|
||||
res[state_i][terms.index(term)] = new
|
||||
#
|
||||
# calculate reduction by epsilon productions
|
||||
#
|
||||
elif self.productions[prodind].RHS[rhsind] in self.nonterminals:
|
||||
nt = self.productions[prodind].RHS[rhsind]
|
||||
ntfirst = self.firstmap[nt]
|
||||
ntfirsts = self.ntfirstmap.get(nt, {})
|
||||
for k in ntfirsts.keys():
|
||||
if self.epslhs.get(k, ""):
|
||||
reduceterms = self.followmap[k]
|
||||
# print `((prodind, rhsind), term)`, reduceterms
|
||||
for r in reduceterms:
|
||||
inner = terms.index(r)
|
||||
old = res[state_i][inner]
|
||||
new = ("r", self.epslhs[k])
|
||||
if old != errentry and old != new:
|
||||
print "Conflict[%d,%d]:" % (state_i, inner), old, "->", new
|
||||
res[state_i][inner] = new
|
||||
#
|
||||
# calculate the shifts that occur but whose normal items aren't in the kernel
|
||||
#
|
||||
tfirsts = self.tfirstmap[nt]
|
||||
for t in tfirsts:
|
||||
inner = terms.index(t)
|
||||
g = self.goto(self.kernelitems[state_i], t)
|
||||
old = res[state_i][inner]
|
||||
try:
|
||||
news = self.kernelitems.index(g)
|
||||
except ValueError:
|
||||
continue
|
||||
new = ("s", news)
|
||||
if old != errentry and old != new:
|
||||
print "Conflict[%d,%d]:" % (state_i, inner), old, "->", new
|
||||
res[state_i][inner] = new
|
||||
#
|
||||
# compute the rest of the shifts that occur 'normally' in the kernel
|
||||
#
|
||||
else:
|
||||
t = self.productions[prodind].RHS[rhsind]
|
||||
inner = self.terminals.index(t)
|
||||
gt = self.goto(self.kernelitems[state_i], t)
|
||||
if gt in self.kernelitems:
|
||||
news = self.kernelitems.index(gt)
|
||||
old = res[state_i][inner]
|
||||
new = ("s", news)
|
||||
if old != errentry and old != new:
|
||||
print "Conflict[%d,%d]:" % (state_i, inner), old, "->", new
|
||||
res[state_i][inner] = new
|
||||
state_i = state_i + 1
|
||||
return res
|
||||
|
||||
def gototable(self):
|
||||
items = self.kernelitems
|
||||
res = []
|
||||
state_i = 0
|
||||
nonterms = self.nonterminals
|
||||
err = None
|
||||
for state in items:
|
||||
list = [err] * len(nonterms)
|
||||
res.append(list)
|
||||
nonterm_i = 0
|
||||
for nt in nonterms:
|
||||
goto = self.goto(state, nt)
|
||||
if goto in items:
|
||||
res[state_i][nonterm_i] = items.index(goto)
|
||||
nonterm_i = nonterm_i + 1
|
||||
state_i = state_i + 1
|
||||
return res
|
||||
|
||||
def mkengine(self, inbufchunksize=None, stackchunksize=None):
|
||||
"""dynamically will produde a parse engine, just an experiment,
|
||||
don't try to use it for anything real.
|
||||
"""
|
||||
self.augment()
|
||||
self.LALR1items()
|
||||
at = self.actiontable()
|
||||
gt = self.gototable()
|
||||
self.productions = self.productions[1:] # unaugment
|
||||
pi = self.prodinfotable()
|
||||
if not inbufchunksize:
|
||||
inbufchunksize = 50
|
||||
if not stackchunksize:
|
||||
stackchunksize = 100
|
||||
e = PyLRengine.NewEngine(pi, at, gt, inbufchunksize, stackchunksize)
|
||||
return e
|
||||
|
||||
def writefile(self, filename, parsername="MyParser", lexerinit = "PyLR.Lexer.Lexer()"):
|
||||
self.augment()
|
||||
print "About to start LALRitems at %d" % time.time()
|
||||
self.LALR1items()
|
||||
print "done building LALRitems at %d" % time.time()
|
||||
at = self.actiontable()
|
||||
print "done building actiontable at %d" % time.time()
|
||||
gt = self.gototable()
|
||||
print "done building gototable at %d" % time.time()
|
||||
self.productions = self.productions[1:]
|
||||
pi = self.prodinfotable()
|
||||
template = parsertemplate.__doc__
|
||||
vals = {"parsername": parsername, "lexerinit": lexerinit}
|
||||
vals["date"] = time.ctime(time.time())
|
||||
vals["filename"] = filename
|
||||
if not hasattr(self, "extrasource"):
|
||||
vals["extrasource"] = ""
|
||||
else:
|
||||
vals["extrasource"] = self.extrasource
|
||||
vals["grammar"] = `self`
|
||||
actiontable_s = "[\n\t"
|
||||
for l in at:
|
||||
actiontable_s = "%s%s,\n\t" % (actiontable_s, `l`)
|
||||
vals["actiontable"] = actiontable_s[:-3] + "\n]\n\n"
|
||||
gototable_s = "[\n\t"
|
||||
for l in gt:
|
||||
gototable_s = "%s%s,\n\t" % (gototable_s, `l`)
|
||||
vals["gototable"] = gototable_s[:-3] + "\n]\n\n"
|
||||
pi_s = "[\n\t"
|
||||
pii = 0
|
||||
vals["symbols"] = `self.tokens`
|
||||
prod2func_s = "Production" + " " * 45 + "Method Name\n"
|
||||
for l, f, e in pi:
|
||||
pi_s = "%s(%d, '%s', %d),%s# %s\n\t" % (pi_s,
|
||||
l,
|
||||
self.productions[pii].funcname,
|
||||
e,
|
||||
" " * (18 - len(self.productions[pii].funcname)),
|
||||
`self.productions[pii]` )
|
||||
pii = pii + 1
|
||||
vals["prodinfo"] = pi_s + "]\n\n"
|
||||
fp = open(filename, "w")
|
||||
fp.write(template % vals)
|
||||
fp.close()
|
||||
|
||||
|
||||
def _makeprod(x):
|
||||
if len(x)==3: return Production(x[0],x[1],x[2])
|
||||
if len(x)==2: return Production(x[0],x[1])
|
||||
raise AttributeError, "Invalid Production initializer"
|
||||
|
||||
def _bootstrap():
|
||||
# dang, how did Scott bootstrap the GrammarParser??
|
||||
# have to make this by hand
|
||||
import Lexers
|
||||
|
||||
# define the productions
|
||||
toks = Lexers.GrammarLex().getTokenList()
|
||||
prods = map(_makeprod,
|
||||
[("pspec", ["gspec"]),
|
||||
("pspec", ["pydefs", "gspec"]),
|
||||
("gspec", [toks.index("GDEL"), "lhsdeflist", toks.index("GDEL")]),
|
||||
("pydefs", ["pydefs", "pydef"]),
|
||||
("pydefs", ["pydef"]),
|
||||
("pydef", [toks.index("LEX")], "lexdef"),
|
||||
("pydef", [toks.index("CODE")], "addcode"),
|
||||
("pydef", [toks.index("CLASS")], "classname"),
|
||||
("lhsdeflist", ["lhsdeflist", "lhsdef"]),
|
||||
("lhsdeflist", ["lhsdef"]),
|
||||
("lhsdef", [toks.index("ID"), toks.index("COLON"), "rhslist", toks.index("SCOLON")], "lhsdef"),
|
||||
("rhslist", ["rhs"], "singletolist"),
|
||||
("rhslist", ["rhslist", toks.index("OR"), "rhs"], "rhslist_OR_rhs"),
|
||||
("rhs", ["rhsidlist"], "rhs_idlist"),
|
||||
("rhs", ["rhsidlist", toks.index("LPAREN"), toks.index("ID"), toks.index("RPAREN")], "rhs_idlist_func"),
|
||||
("rhsidlist", ["idlist"]),
|
||||
("rhsidlist", [], "rhseps"),
|
||||
("idlist", ["idlist", toks.index("ID")], "idl_idlistID"),
|
||||
("idlist", [toks.index("ID")], "idlistID")])
|
||||
print string.join(map(lambda x: str(x), prods), "\n")
|
||||
g = LALRGrammar(prods, toks)
|
||||
|
||||
# g.extrasources = "import PyLR.Parsers"
|
||||
# produce the parser
|
||||
g.writefile("./Parsers/GrammarParser.py", "GrammarParser", "PyLR.Lexers.GrammarLex()")
|
||||
|
||||
if __name__=='__main__':
|
||||
_bootstrap()
|
||||
|
||||
77
PyLR/Lexer.py
Normal file
77
PyLR/Lexer.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
import re, string, StringUtil
|
||||
|
||||
__version__ = "$Id$"
|
||||
|
||||
class PyLRSyntaxError(SyntaxError):
|
||||
pass
|
||||
|
||||
SKIPTOK = 0x01 # don't consider this a token that is to be considered a part of the grammar, like '\n'
|
||||
|
||||
class Lexer:
|
||||
"""
|
||||
This is a lexer class for PyLR.
|
||||
|
||||
Upon matching text, it must execute a function which will cause it
|
||||
to return a 2-tuple of type (tok, val) where token is an integer and
|
||||
val is just any python object that will later be passed as an argument
|
||||
to the functions that the parser will call when it reduces. For Example
|
||||
|
||||
for the grammar
|
||||
|
||||
E-> E + T
|
||||
E -> T
|
||||
T -> T * F
|
||||
T -> F
|
||||
F ->( E )
|
||||
F -> id
|
||||
|
||||
it is likely that the lexer should return the token value of id <tok> and
|
||||
the integer value of id (string.atoi(id)).
|
||||
|
||||
In addition, the lexer must always return (eof, something else) when it's done
|
||||
scanning to get the parser to continue to be called until parsing is done.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.toklist = [("EOF", None, None, 0)]
|
||||
self.settext("")
|
||||
|
||||
def settext(self, t):
|
||||
self.text = t
|
||||
self.rewind()
|
||||
|
||||
def getTokenList(self):
|
||||
"""return list of token names"""
|
||||
return map(lambda x: x[0], self.toklist)
|
||||
|
||||
def rewind(self):
|
||||
self.textindex = 0
|
||||
|
||||
def addpat(self, pat, tokname=None, func=None, flags=0):
|
||||
"""add search pattern to the lexer"""
|
||||
self.toklist.append((tokname, re.compile(pat), func, flags))
|
||||
|
||||
def __str__(self):
|
||||
return string.join(map(lambda x: str(x[0])+": "+str(x[1]), self.toklist), "\n")
|
||||
|
||||
def scan(self, verbose=0):
|
||||
if self.textindex >= len(self.text):
|
||||
if verbose: print "EOF"
|
||||
return (0, "EOF")
|
||||
for i in range(1,len(self.toklist)):
|
||||
tok = self.toklist[i]
|
||||
mo = tok[1].match(self.text, self.textindex)
|
||||
if mo is None: # could be the empty string
|
||||
continue
|
||||
self.textindex = self.textindex + len(mo.group(0))
|
||||
if tok[3] & SKIPTOK:
|
||||
return self.scan(verbose)
|
||||
else:
|
||||
if tok[2]:
|
||||
val = apply(tok[2], (mo,))
|
||||
else:
|
||||
val = mo.group(0)
|
||||
if verbose: print str(i)+", "+str(val)
|
||||
return (i, val)
|
||||
raise PyLRSyntaxError, "line "+\
|
||||
`StringUtil.getLineNumber(self.text, self.textindex)`+\
|
||||
", near \""+self.text[self.textindex:self.textindex + 10]+"\""
|
||||
31
PyLR/Lexers/GrammarLex.py
Normal file
31
PyLR/Lexers/GrammarLex.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
"""
|
||||
this file contains the Lexer that is used in parsing Grammar specifications
|
||||
"""
|
||||
|
||||
import re,Lexer
|
||||
|
||||
def retlex(mo):
|
||||
return mo.group("lex")
|
||||
|
||||
def retcode(mo):
|
||||
return mo.group("code")
|
||||
|
||||
def retclass(mo):
|
||||
return mo.group("class")
|
||||
|
||||
class GrammarLex(Lexer.Lexer):
|
||||
def __init__(self):
|
||||
Lexer.Lexer.__init__(self)
|
||||
self.addpat(r"_lex\s+(?P<lex>[^\n]*)", "LEX", retlex)
|
||||
self.addpat(r"_code\s+(?P<code>[^\n]*)", "CODE", retcode)
|
||||
self.addpat(r"_class\s+(?P<class>[a-zA-Z_][a-zA-Z_0-9]*)", "CLASS", retclass)
|
||||
self.addpat(r"[a-zA-Z_][a-zA-Z_0-9]*", "ID")
|
||||
self.addpat(r":", "COLON")
|
||||
self.addpat(r";", "SCOLON")
|
||||
self.addpat(r"\|", "OR")
|
||||
self.addpat(r"\(", "LPAREN")
|
||||
self.addpat(r"\)", "RPAREN")
|
||||
self.addpat(r'"""', "GDEL")
|
||||
self.addpat(r"\s*#[^\n]*", "", None, Lexer.SKIPTOK)
|
||||
self.addpat(r"\s+", "", None, Lexer.SKIPTOK)
|
||||
|
||||
5
PyLR/Lexers/__init__.py
Normal file
5
PyLR/Lexers/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
|
||||
from GrammarLex import GrammarLex
|
||||
|
||||
|
||||
|
||||
15
PyLR/Lexers/mathlex.py
Normal file
15
PyLR/Lexers/mathlex.py
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
import Lexer, re, string
|
||||
|
||||
def idfunc(m):
|
||||
return int(m.group(0))
|
||||
|
||||
class mathlex(Lexer.Lexer):
|
||||
def __init__(self):
|
||||
Lexer.Lexer.__init__(self)
|
||||
self.addpat(r"([1-9]([0-9]+)?)|0", "ID", idfunc)
|
||||
self.addpat(r"\+", "PLUS")
|
||||
self.addpat(r"\*","TIMES")
|
||||
self.addpat(r"\(", "LPAREN")
|
||||
self.addpat(r"\)", "RPAREN")
|
||||
self.addpat(r"\s+", "", None, Lexer.SKIPTOK)
|
||||
|
||||
319
PyLR/Makefile
Normal file
319
PyLR/Makefile
Normal file
|
|
@ -0,0 +1,319 @@
|
|||
# Generated automatically from Makefile.pre by makesetup.
|
||||
# Generated automatically from Makefile.pre.in by sedscript.
|
||||
# Universal Unix Makefile for Python extensions
|
||||
# =============================================
|
||||
|
||||
# Short Instructions
|
||||
# ------------------
|
||||
|
||||
# 1. Build and install Python (1.5 or newer).
|
||||
# 2. "make -f Makefile.pre.in boot"
|
||||
# 3. "make"
|
||||
# You should now have a shared library.
|
||||
|
||||
# Long Instructions
|
||||
# -----------------
|
||||
|
||||
# Build *and install* the basic Python 1.5 distribution. See the
|
||||
# Python README for instructions. (This version of Makefile.pre.in
|
||||
# only withs with Python 1.5, alpha 3 or newer.)
|
||||
|
||||
# Create a file Setup.in for your extension. This file follows the
|
||||
# format of the Modules/Setup.in file; see the instructions there.
|
||||
# For a simple module called "spam" on file "spammodule.c", it can
|
||||
# contain a single line:
|
||||
# spam spammodule.c
|
||||
# You can build as many modules as you want in the same directory --
|
||||
# just have a separate line for each of them in the Setup.in file.
|
||||
|
||||
# If you want to build your extension as a shared library, insert a
|
||||
# line containing just the string
|
||||
# *shared*
|
||||
# at the top of your Setup.in file.
|
||||
|
||||
# Note that the build process copies Setup.in to Setup, and then works
|
||||
# with Setup. It doesn't overwrite Setup when Setup.in is changed, so
|
||||
# while you're in the process of debugging your Setup.in file, you may
|
||||
# want to edit Setup instead, and copy it back to Setup.in later.
|
||||
# (All this is done so you can distribute your extension easily and
|
||||
# someone else can select the modules they actually want to build by
|
||||
# commenting out lines in the Setup file, without editing the
|
||||
# original. Editing Setup is also used to specify nonstandard
|
||||
# locations for include or library files.)
|
||||
|
||||
# Copy this file (Misc/Makefile.pre.in) to the directory containing
|
||||
# your extension.
|
||||
|
||||
# Run "make -f Makefile.pre.in boot". This creates Makefile
|
||||
# (producing Makefile.pre and sedscript as intermediate files) and
|
||||
# config.c, incorporating the values for sys.prefix, sys.exec_prefix
|
||||
# and sys.version from the installed Python binary. For this to work,
|
||||
# the python binary must be on your path. If this fails, try
|
||||
# make -f Makefile.pre.in Makefile VERSION=1.5 installdir=<prefix>
|
||||
# where <prefix> is the prefix used to install Python for installdir
|
||||
# (and possibly similar for exec_installdir=<exec_prefix>).
|
||||
|
||||
# Note: "make boot" implies "make clobber" -- it assumes that when you
|
||||
# bootstrap you may have changed platforms so it removes all previous
|
||||
# output files.
|
||||
|
||||
# If you are building your extension as a shared library (your
|
||||
# Setup.in file starts with *shared*), run "make" or "make sharedmods"
|
||||
# to build the shared library files. If you are building a statically
|
||||
# linked Python binary (the only solution of your platform doesn't
|
||||
# support shared libraries, and sometimes handy if you want to
|
||||
# distribute or install the resulting Python binary), run "make
|
||||
# python".
|
||||
|
||||
# Note: Each time you edit Makefile.pre.in or Setup, you must run
|
||||
# "make Makefile" before running "make".
|
||||
|
||||
# Hint: if you want to use VPATH, you can start in an empty
|
||||
# subdirectory and say (e.g.):
|
||||
# make -f ../Makefile.pre.in boot srcdir=.. VPATH=..
|
||||
|
||||
|
||||
# === Bootstrap variables (edited through "make boot") ===
|
||||
|
||||
# The prefix used by "make inclinstall libainstall" of core python
|
||||
installdir= /usr
|
||||
|
||||
# The exec_prefix used by the same
|
||||
exec_installdir=/usr
|
||||
|
||||
# Source directory and VPATH in case you want to use VPATH.
|
||||
# (You will have to edit these two lines yourself -- there is no
|
||||
# automatic support as the Makefile is not generated by
|
||||
# config.status.)
|
||||
srcdir= .
|
||||
VPATH= .
|
||||
|
||||
# === Variables that you may want to customize (rarely) ===
|
||||
|
||||
# (Static) build target
|
||||
TARGET= python
|
||||
|
||||
# Installed python binary (used only by boot target)
|
||||
PYTHON= python
|
||||
|
||||
# Add more -I and -D options here
|
||||
CFLAGS= $(OPT) -I$(INCLUDEPY) -I$(EXECINCLUDEPY) $(DEFS)
|
||||
|
||||
# These two variables can be set in Setup to merge extensions.
|
||||
# See example[23].
|
||||
BASELIB=
|
||||
BASESETUP=
|
||||
|
||||
# === Variables set by makesetup ===
|
||||
|
||||
MODOBJS=
|
||||
MODLIBS= $(LOCALMODLIBS) $(BASEMODLIBS)
|
||||
|
||||
# === Definitions added by makesetup ===
|
||||
|
||||
LOCALMODLIBS=
|
||||
BASEMODLIBS=
|
||||
SHAREDMODS= PyLRenginemodule$(SO)
|
||||
TKPATH=:lib-tk
|
||||
GLHACK=-Dclear=__GLclear
|
||||
PYTHONPATH=$(COREPYTHONPATH)
|
||||
COREPYTHONPATH=$(DESTPATH)$(SITEPATH)$(MACHDEPPATH)$(STDWINPATH)$(TKPATH)
|
||||
MACHDEPPATH=:plat-$(MACHDEP)
|
||||
TESTPATH=
|
||||
SITEPATH=
|
||||
DESTPATH=
|
||||
MACHDESTLIB=$(BINLIBDEST)
|
||||
DESTLIB=$(LIBDEST)
|
||||
|
||||
|
||||
# === Variables from configure (through sedscript) ===
|
||||
|
||||
VERSION= 1.5
|
||||
CC= gcc
|
||||
LINKCC= $(CC)
|
||||
SGI_ABI= @SGI_ABI@
|
||||
OPT= -g -O2
|
||||
LDFLAGS=
|
||||
DEFS= -DHAVE_CONFIG_H
|
||||
LIBS= -lieee -ldl -lpthread
|
||||
LIBM= -lm
|
||||
LIBC=
|
||||
RANLIB= ranlib
|
||||
MACHDEP= linux2
|
||||
SO= .so
|
||||
LDSHARED= gcc -shared -lc
|
||||
CCSHARED= -fPIC
|
||||
LINKFORSHARED= -Xlinker -export-dynamic
|
||||
CCC=g++
|
||||
|
||||
# Install prefix for architecture-independent files
|
||||
prefix= /usr
|
||||
|
||||
# Install prefix for architecture-dependent files
|
||||
exec_prefix= ${prefix}
|
||||
|
||||
# === Fixed definitions ===
|
||||
|
||||
# Shell used by make (some versions default to the login shell, which is bad)
|
||||
SHELL= /bin/sh
|
||||
|
||||
# Expanded directories
|
||||
BINDIR= $(exec_installdir)/bin
|
||||
LIBDIR= $(exec_prefix)/lib
|
||||
MANDIR= $(installdir)/man
|
||||
INCLUDEDIR= $(installdir)/include
|
||||
SCRIPTDIR= $(prefix)/lib
|
||||
|
||||
# Detailed destination directories
|
||||
BINLIBDEST= $(LIBDIR)/python$(VERSION)
|
||||
LIBDEST= $(SCRIPTDIR)/python$(VERSION)
|
||||
INCLUDEPY= $(INCLUDEDIR)/python$(VERSION)
|
||||
EXECINCLUDEPY= $(exec_installdir)/include/python$(VERSION)
|
||||
LIBP= $(exec_installdir)/lib/python$(VERSION)
|
||||
DESTSHARED= $(BINLIBDEST)/site-packages
|
||||
|
||||
LIBPL= $(LIBP)/config
|
||||
|
||||
PYTHONLIBS= $(LIBPL)/libpython$(VERSION).a
|
||||
|
||||
MAKESETUP= $(LIBPL)/makesetup
|
||||
MAKEFILE= $(LIBPL)/Makefile
|
||||
CONFIGC= $(LIBPL)/config.c
|
||||
CONFIGCIN= $(LIBPL)/config.c.in
|
||||
SETUP= $(LIBPL)/Setup
|
||||
|
||||
SYSLIBS= $(LIBM) $(LIBC)
|
||||
|
||||
ADDOBJS= $(LIBPL)/python.o config.o
|
||||
|
||||
# Portable install script (configure doesn't always guess right)
|
||||
INSTALL= $(LIBPL)/install-sh -c
|
||||
# Shared libraries must be installed with executable mode on some systems;
|
||||
# rather than figuring out exactly which, we always give them executable mode.
|
||||
# Also, making them read-only seems to be a good idea...
|
||||
INSTALL_SHARED= ${INSTALL} -m 555
|
||||
|
||||
# === Fixed rules ===
|
||||
|
||||
# Default target. This builds shared libraries only
|
||||
default: sharedmods
|
||||
|
||||
# Build everything
|
||||
all: static sharedmods
|
||||
|
||||
# Build shared libraries from our extension modules
|
||||
sharedmods: $(SHAREDMODS)
|
||||
|
||||
# Build a static Python binary containing our extension modules
|
||||
static: $(TARGET)
|
||||
$(TARGET): $(ADDOBJS) lib.a $(PYTHONLIBS) Makefile $(BASELIB)
|
||||
$(CC) $(LDFLAGS) $(ADDOBJS) lib.a $(PYTHONLIBS) \
|
||||
$(LINKPATH) $(BASELIB) $(MODLIBS) $(LIBS) $(SYSLIBS) \
|
||||
-o $(TARGET)
|
||||
|
||||
install: sharedmods
|
||||
if test ! -d $(DESTSHARED) ; then \
|
||||
mkdir $(DESTSHARED) ; else true ; fi
|
||||
-for i in X $(SHAREDMODS); do \
|
||||
if test $$i != X; \
|
||||
then $(INSTALL_SHARED) $$i $(DESTSHARED)/$$i; \
|
||||
fi; \
|
||||
done
|
||||
|
||||
# Build the library containing our extension modules
|
||||
lib.a: $(MODOBJS)
|
||||
-rm -f lib.a
|
||||
ar cr lib.a $(MODOBJS)
|
||||
-$(RANLIB) lib.a
|
||||
|
||||
# This runs makesetup *twice* to use the BASESETUP definition from Setup
|
||||
config.c Makefile: Makefile.pre Setup $(BASESETUP) $(MAKESETUP)
|
||||
$(MAKESETUP) \
|
||||
-m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP)
|
||||
$(MAKE) -f Makefile do-it-again
|
||||
|
||||
# Internal target to run makesetup for the second time
|
||||
do-it-again:
|
||||
$(MAKESETUP) \
|
||||
-m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP)
|
||||
|
||||
# Make config.o from the config.c created by makesetup
|
||||
config.o: config.c
|
||||
$(CC) $(CFLAGS) -c config.c
|
||||
|
||||
# Setup is copied from Setup.in *only* if it doesn't yet exist
|
||||
Setup:
|
||||
cp $(srcdir)/Setup.in Setup
|
||||
|
||||
# Make the intermediate Makefile.pre from Makefile.pre.in
|
||||
Makefile.pre: Makefile.pre.in sedscript
|
||||
sed -f sedscript $(srcdir)/Makefile.pre.in >Makefile.pre
|
||||
|
||||
# Shortcuts to make the sed arguments on one line
|
||||
P=prefix
|
||||
E=exec_prefix
|
||||
H=Generated automatically from Makefile.pre.in by sedscript.
|
||||
L=LINKFORSHARED
|
||||
|
||||
# Make the sed script used to create Makefile.pre from Makefile.pre.in
|
||||
sedscript: $(MAKEFILE)
|
||||
sed -n \
|
||||
-e '1s/.*/1i\\/p' \
|
||||
-e '2s%.*%# $H%p' \
|
||||
-e '/^VERSION=/s/^VERSION=[ ]*\(.*\)/s%@VERSION[@]%\1%/p' \
|
||||
-e '/^CC=/s/^CC=[ ]*\(.*\)/s%@CC[@]%\1%/p' \
|
||||
-e '/^CCC=/s/^CCC=[ ]*\(.*\)/s%#@SET_CCC[@]%CCC=\1%/p' \
|
||||
-e '/^LINKCC=/s/^LINKCC=[ ]*\(.*\)/s%@LINKCC[@]%\1%/p' \
|
||||
-e '/^OPT=/s/^OPT=[ ]*\(.*\)/s%@OPT[@]%\1%/p' \
|
||||
-e '/^LDFLAGS=/s/^LDFLAGS=[ ]*\(.*\)/s%@LDFLAGS[@]%\1%/p' \
|
||||
-e '/^DEFS=/s/^DEFS=[ ]*\(.*\)/s%@DEFS[@]%\1%/p' \
|
||||
-e '/^LIBS=/s/^LIBS=[ ]*\(.*\)/s%@LIBS[@]%\1%/p' \
|
||||
-e '/^LIBM=/s/^LIBM=[ ]*\(.*\)/s%@LIBM[@]%\1%/p' \
|
||||
-e '/^LIBC=/s/^LIBC=[ ]*\(.*\)/s%@LIBC[@]%\1%/p' \
|
||||
-e '/^RANLIB=/s/^RANLIB=[ ]*\(.*\)/s%@RANLIB[@]%\1%/p' \
|
||||
-e '/^MACHDEP=/s/^MACHDEP=[ ]*\(.*\)/s%@MACHDEP[@]%\1%/p' \
|
||||
-e '/^SO=/s/^SO=[ ]*\(.*\)/s%@SO[@]%\1%/p' \
|
||||
-e '/^LDSHARED=/s/^LDSHARED=[ ]*\(.*\)/s%@LDSHARED[@]%\1%/p' \
|
||||
-e '/^CCSHARED=/s/^CCSHARED=[ ]*\(.*\)/s%@CCSHARED[@]%\1%/p' \
|
||||
-e '/^$L=/s/^$L=[ ]*\(.*\)/s%@$L[@]%\1%/p' \
|
||||
-e '/^$P=/s/^$P=\(.*\)/s%^$P=.*%$P=\1%/p' \
|
||||
-e '/^$E=/s/^$E=\(.*\)/s%^$E=.*%$E=\1%/p' \
|
||||
$(MAKEFILE) >sedscript
|
||||
echo "/^CCC=g++/d" >>sedscript
|
||||
echo "/^installdir=/s%=.*%= $(installdir)%" >>sedscript
|
||||
echo "/^exec_installdir=/s%=.*%=$(exec_installdir)%" >>sedscript
|
||||
echo "/^srcdir=/s%=.*%= $(srcdir)%" >>sedscript
|
||||
echo "/^VPATH=/s%=.*%= $(VPATH)%" >>sedscript
|
||||
echo "/^LINKPATH=/s%=.*%= $(LINKPATH)%" >>sedscript
|
||||
echo "/^BASELIB=/s%=.*%= $(BASELIB)%" >>sedscript
|
||||
echo "/^BASESETUP=/s%=.*%= $(BASESETUP)%" >>sedscript
|
||||
|
||||
# Bootstrap target
|
||||
boot: clobber
|
||||
VERSION=`$(PYTHON) -c "import sys; print sys.version[:3]"`; \
|
||||
installdir=`$(PYTHON) -c "import sys; print sys.prefix"`; \
|
||||
exec_installdir=`$(PYTHON) -c "import sys; print sys.exec_prefix"`; \
|
||||
$(MAKE) -f $(srcdir)/Makefile.pre.in VPATH=$(VPATH) srcdir=$(srcdir) \
|
||||
VERSION=$$VERSION \
|
||||
installdir=$$installdir \
|
||||
exec_installdir=$$exec_installdir \
|
||||
Makefile
|
||||
|
||||
# Handy target to remove intermediate files and backups
|
||||
clean:
|
||||
-rm -f *.o *~
|
||||
|
||||
# Handy target to remove everything that is easily regenerated
|
||||
clobber: clean
|
||||
-rm -f *.a tags TAGS config.c Makefile.pre $(TARGET) sedscript
|
||||
-rm -f *.so *.sl so_locations
|
||||
|
||||
|
||||
# Handy target to remove everything you don't want to distribute
|
||||
distclean: clobber
|
||||
-rm -f Makefile Setup
|
||||
|
||||
# Rules appended by makedepend
|
||||
|
||||
PyLRenginemodule.o: $(srcdir)/PyLRenginemodule.c; $(CC) $(CCSHARED) $(CFLAGS) -c $(srcdir)/PyLRenginemodule.c
|
||||
PyLRenginemodule$(SO): PyLRenginemodule.o; $(LDSHARED) PyLRenginemodule.o -o PyLRenginemodule$(SO)
|
||||
298
PyLR/Makefile.pre
Normal file
298
PyLR/Makefile.pre
Normal file
|
|
@ -0,0 +1,298 @@
|
|||
# Generated automatically from Makefile.pre.in by sedscript.
|
||||
# Universal Unix Makefile for Python extensions
|
||||
# =============================================
|
||||
|
||||
# Short Instructions
|
||||
# ------------------
|
||||
|
||||
# 1. Build and install Python (1.5 or newer).
|
||||
# 2. "make -f Makefile.pre.in boot"
|
||||
# 3. "make"
|
||||
# You should now have a shared library.
|
||||
|
||||
# Long Instructions
|
||||
# -----------------
|
||||
|
||||
# Build *and install* the basic Python 1.5 distribution. See the
|
||||
# Python README for instructions. (This version of Makefile.pre.in
|
||||
# only withs with Python 1.5, alpha 3 or newer.)
|
||||
|
||||
# Create a file Setup.in for your extension. This file follows the
|
||||
# format of the Modules/Setup.in file; see the instructions there.
|
||||
# For a simple module called "spam" on file "spammodule.c", it can
|
||||
# contain a single line:
|
||||
# spam spammodule.c
|
||||
# You can build as many modules as you want in the same directory --
|
||||
# just have a separate line for each of them in the Setup.in file.
|
||||
|
||||
# If you want to build your extension as a shared library, insert a
|
||||
# line containing just the string
|
||||
# *shared*
|
||||
# at the top of your Setup.in file.
|
||||
|
||||
# Note that the build process copies Setup.in to Setup, and then works
|
||||
# with Setup. It doesn't overwrite Setup when Setup.in is changed, so
|
||||
# while you're in the process of debugging your Setup.in file, you may
|
||||
# want to edit Setup instead, and copy it back to Setup.in later.
|
||||
# (All this is done so you can distribute your extension easily and
|
||||
# someone else can select the modules they actually want to build by
|
||||
# commenting out lines in the Setup file, without editing the
|
||||
# original. Editing Setup is also used to specify nonstandard
|
||||
# locations for include or library files.)
|
||||
|
||||
# Copy this file (Misc/Makefile.pre.in) to the directory containing
|
||||
# your extension.
|
||||
|
||||
# Run "make -f Makefile.pre.in boot". This creates Makefile
|
||||
# (producing Makefile.pre and sedscript as intermediate files) and
|
||||
# config.c, incorporating the values for sys.prefix, sys.exec_prefix
|
||||
# and sys.version from the installed Python binary. For this to work,
|
||||
# the python binary must be on your path. If this fails, try
|
||||
# make -f Makefile.pre.in Makefile VERSION=1.5 installdir=<prefix>
|
||||
# where <prefix> is the prefix used to install Python for installdir
|
||||
# (and possibly similar for exec_installdir=<exec_prefix>).
|
||||
|
||||
# Note: "make boot" implies "make clobber" -- it assumes that when you
|
||||
# bootstrap you may have changed platforms so it removes all previous
|
||||
# output files.
|
||||
|
||||
# If you are building your extension as a shared library (your
|
||||
# Setup.in file starts with *shared*), run "make" or "make sharedmods"
|
||||
# to build the shared library files. If you are building a statically
|
||||
# linked Python binary (the only solution of your platform doesn't
|
||||
# support shared libraries, and sometimes handy if you want to
|
||||
# distribute or install the resulting Python binary), run "make
|
||||
# python".
|
||||
|
||||
# Note: Each time you edit Makefile.pre.in or Setup, you must run
|
||||
# "make Makefile" before running "make".
|
||||
|
||||
# Hint: if you want to use VPATH, you can start in an empty
|
||||
# subdirectory and say (e.g.):
|
||||
# make -f ../Makefile.pre.in boot srcdir=.. VPATH=..
|
||||
|
||||
|
||||
# === Bootstrap variables (edited through "make boot") ===
|
||||
|
||||
# The prefix used by "make inclinstall libainstall" of core python
|
||||
installdir= /usr
|
||||
|
||||
# The exec_prefix used by the same
|
||||
exec_installdir=/usr
|
||||
|
||||
# Source directory and VPATH in case you want to use VPATH.
|
||||
# (You will have to edit these two lines yourself -- there is no
|
||||
# automatic support as the Makefile is not generated by
|
||||
# config.status.)
|
||||
srcdir= .
|
||||
VPATH= .
|
||||
|
||||
# === Variables that you may want to customize (rarely) ===
|
||||
|
||||
# (Static) build target
|
||||
TARGET= python
|
||||
|
||||
# Installed python binary (used only by boot target)
|
||||
PYTHON= python
|
||||
|
||||
# Add more -I and -D options here
|
||||
CFLAGS= $(OPT) -I$(INCLUDEPY) -I$(EXECINCLUDEPY) $(DEFS)
|
||||
|
||||
# These two variables can be set in Setup to merge extensions.
|
||||
# See example[23].
|
||||
BASELIB=
|
||||
BASESETUP=
|
||||
|
||||
# === Variables set by makesetup ===
|
||||
|
||||
MODOBJS= _MODOBJS_
|
||||
MODLIBS= _MODLIBS_
|
||||
|
||||
# === Definitions added by makesetup ===
|
||||
|
||||
# === Variables from configure (through sedscript) ===
|
||||
|
||||
VERSION= 1.5
|
||||
CC= gcc
|
||||
LINKCC= $(CC)
|
||||
SGI_ABI= @SGI_ABI@
|
||||
OPT= -g -O2
|
||||
LDFLAGS=
|
||||
DEFS= -DHAVE_CONFIG_H
|
||||
LIBS= -lieee -ldl -lpthread
|
||||
LIBM= -lm
|
||||
LIBC=
|
||||
RANLIB= ranlib
|
||||
MACHDEP= linux2
|
||||
SO= .so
|
||||
LDSHARED= gcc -shared -lc
|
||||
CCSHARED= -fPIC
|
||||
LINKFORSHARED= -Xlinker -export-dynamic
|
||||
CCC=g++
|
||||
|
||||
# Install prefix for architecture-independent files
|
||||
prefix= /usr
|
||||
|
||||
# Install prefix for architecture-dependent files
|
||||
exec_prefix= ${prefix}
|
||||
|
||||
# === Fixed definitions ===
|
||||
|
||||
# Shell used by make (some versions default to the login shell, which is bad)
|
||||
SHELL= /bin/sh
|
||||
|
||||
# Expanded directories
|
||||
BINDIR= $(exec_installdir)/bin
|
||||
LIBDIR= $(exec_prefix)/lib
|
||||
MANDIR= $(installdir)/man
|
||||
INCLUDEDIR= $(installdir)/include
|
||||
SCRIPTDIR= $(prefix)/lib
|
||||
|
||||
# Detailed destination directories
|
||||
BINLIBDEST= $(LIBDIR)/python$(VERSION)
|
||||
LIBDEST= $(SCRIPTDIR)/python$(VERSION)
|
||||
INCLUDEPY= $(INCLUDEDIR)/python$(VERSION)
|
||||
EXECINCLUDEPY= $(exec_installdir)/include/python$(VERSION)
|
||||
LIBP= $(exec_installdir)/lib/python$(VERSION)
|
||||
DESTSHARED= $(BINLIBDEST)/site-packages
|
||||
|
||||
LIBPL= $(LIBP)/config
|
||||
|
||||
PYTHONLIBS= $(LIBPL)/libpython$(VERSION).a
|
||||
|
||||
MAKESETUP= $(LIBPL)/makesetup
|
||||
MAKEFILE= $(LIBPL)/Makefile
|
||||
CONFIGC= $(LIBPL)/config.c
|
||||
CONFIGCIN= $(LIBPL)/config.c.in
|
||||
SETUP= $(LIBPL)/Setup
|
||||
|
||||
SYSLIBS= $(LIBM) $(LIBC)
|
||||
|
||||
ADDOBJS= $(LIBPL)/python.o config.o
|
||||
|
||||
# Portable install script (configure doesn't always guess right)
|
||||
INSTALL= $(LIBPL)/install-sh -c
|
||||
# Shared libraries must be installed with executable mode on some systems;
|
||||
# rather than figuring out exactly which, we always give them executable mode.
|
||||
# Also, making them read-only seems to be a good idea...
|
||||
INSTALL_SHARED= ${INSTALL} -m 555
|
||||
|
||||
# === Fixed rules ===
|
||||
|
||||
# Default target. This builds shared libraries only
|
||||
default: sharedmods
|
||||
|
||||
# Build everything
|
||||
all: static sharedmods
|
||||
|
||||
# Build shared libraries from our extension modules
|
||||
sharedmods: $(SHAREDMODS)
|
||||
|
||||
# Build a static Python binary containing our extension modules
|
||||
static: $(TARGET)
|
||||
$(TARGET): $(ADDOBJS) lib.a $(PYTHONLIBS) Makefile $(BASELIB)
|
||||
$(CC) $(LDFLAGS) $(ADDOBJS) lib.a $(PYTHONLIBS) \
|
||||
$(LINKPATH) $(BASELIB) $(MODLIBS) $(LIBS) $(SYSLIBS) \
|
||||
-o $(TARGET)
|
||||
|
||||
install: sharedmods
|
||||
if test ! -d $(DESTSHARED) ; then \
|
||||
mkdir $(DESTSHARED) ; else true ; fi
|
||||
-for i in X $(SHAREDMODS); do \
|
||||
if test $$i != X; \
|
||||
then $(INSTALL_SHARED) $$i $(DESTSHARED)/$$i; \
|
||||
fi; \
|
||||
done
|
||||
|
||||
# Build the library containing our extension modules
|
||||
lib.a: $(MODOBJS)
|
||||
-rm -f lib.a
|
||||
ar cr lib.a $(MODOBJS)
|
||||
-$(RANLIB) lib.a
|
||||
|
||||
# This runs makesetup *twice* to use the BASESETUP definition from Setup
|
||||
config.c Makefile: Makefile.pre Setup $(BASESETUP) $(MAKESETUP)
|
||||
$(MAKESETUP) \
|
||||
-m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP)
|
||||
$(MAKE) -f Makefile do-it-again
|
||||
|
||||
# Internal target to run makesetup for the second time
|
||||
do-it-again:
|
||||
$(MAKESETUP) \
|
||||
-m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP)
|
||||
|
||||
# Make config.o from the config.c created by makesetup
|
||||
config.o: config.c
|
||||
$(CC) $(CFLAGS) -c config.c
|
||||
|
||||
# Setup is copied from Setup.in *only* if it doesn't yet exist
|
||||
Setup:
|
||||
cp $(srcdir)/Setup.in Setup
|
||||
|
||||
# Make the intermediate Makefile.pre from Makefile.pre.in
|
||||
Makefile.pre: Makefile.pre.in sedscript
|
||||
sed -f sedscript $(srcdir)/Makefile.pre.in >Makefile.pre
|
||||
|
||||
# Shortcuts to make the sed arguments on one line
|
||||
P=prefix
|
||||
E=exec_prefix
|
||||
H=Generated automatically from Makefile.pre.in by sedscript.
|
||||
L=LINKFORSHARED
|
||||
|
||||
# Make the sed script used to create Makefile.pre from Makefile.pre.in
|
||||
sedscript: $(MAKEFILE)
|
||||
sed -n \
|
||||
-e '1s/.*/1i\\/p' \
|
||||
-e '2s%.*%# $H%p' \
|
||||
-e '/^VERSION=/s/^VERSION=[ ]*\(.*\)/s%@VERSION[@]%\1%/p' \
|
||||
-e '/^CC=/s/^CC=[ ]*\(.*\)/s%@CC[@]%\1%/p' \
|
||||
-e '/^CCC=/s/^CCC=[ ]*\(.*\)/s%#@SET_CCC[@]%CCC=\1%/p' \
|
||||
-e '/^LINKCC=/s/^LINKCC=[ ]*\(.*\)/s%@LINKCC[@]%\1%/p' \
|
||||
-e '/^OPT=/s/^OPT=[ ]*\(.*\)/s%@OPT[@]%\1%/p' \
|
||||
-e '/^LDFLAGS=/s/^LDFLAGS=[ ]*\(.*\)/s%@LDFLAGS[@]%\1%/p' \
|
||||
-e '/^DEFS=/s/^DEFS=[ ]*\(.*\)/s%@DEFS[@]%\1%/p' \
|
||||
-e '/^LIBS=/s/^LIBS=[ ]*\(.*\)/s%@LIBS[@]%\1%/p' \
|
||||
-e '/^LIBM=/s/^LIBM=[ ]*\(.*\)/s%@LIBM[@]%\1%/p' \
|
||||
-e '/^LIBC=/s/^LIBC=[ ]*\(.*\)/s%@LIBC[@]%\1%/p' \
|
||||
-e '/^RANLIB=/s/^RANLIB=[ ]*\(.*\)/s%@RANLIB[@]%\1%/p' \
|
||||
-e '/^MACHDEP=/s/^MACHDEP=[ ]*\(.*\)/s%@MACHDEP[@]%\1%/p' \
|
||||
-e '/^SO=/s/^SO=[ ]*\(.*\)/s%@SO[@]%\1%/p' \
|
||||
-e '/^LDSHARED=/s/^LDSHARED=[ ]*\(.*\)/s%@LDSHARED[@]%\1%/p' \
|
||||
-e '/^CCSHARED=/s/^CCSHARED=[ ]*\(.*\)/s%@CCSHARED[@]%\1%/p' \
|
||||
-e '/^$L=/s/^$L=[ ]*\(.*\)/s%@$L[@]%\1%/p' \
|
||||
-e '/^$P=/s/^$P=\(.*\)/s%^$P=.*%$P=\1%/p' \
|
||||
-e '/^$E=/s/^$E=\(.*\)/s%^$E=.*%$E=\1%/p' \
|
||||
$(MAKEFILE) >sedscript
|
||||
echo "/^CCC=g++/d" >>sedscript
|
||||
echo "/^installdir=/s%=.*%= $(installdir)%" >>sedscript
|
||||
echo "/^exec_installdir=/s%=.*%=$(exec_installdir)%" >>sedscript
|
||||
echo "/^srcdir=/s%=.*%= $(srcdir)%" >>sedscript
|
||||
echo "/^VPATH=/s%=.*%= $(VPATH)%" >>sedscript
|
||||
echo "/^LINKPATH=/s%=.*%= $(LINKPATH)%" >>sedscript
|
||||
echo "/^BASELIB=/s%=.*%= $(BASELIB)%" >>sedscript
|
||||
echo "/^BASESETUP=/s%=.*%= $(BASESETUP)%" >>sedscript
|
||||
|
||||
# Bootstrap target
|
||||
boot: clobber
|
||||
VERSION=`$(PYTHON) -c "import sys; print sys.version[:3]"`; \
|
||||
installdir=`$(PYTHON) -c "import sys; print sys.prefix"`; \
|
||||
exec_installdir=`$(PYTHON) -c "import sys; print sys.exec_prefix"`; \
|
||||
$(MAKE) -f $(srcdir)/Makefile.pre.in VPATH=$(VPATH) srcdir=$(srcdir) \
|
||||
VERSION=$$VERSION \
|
||||
installdir=$$installdir \
|
||||
exec_installdir=$$exec_installdir \
|
||||
Makefile
|
||||
|
||||
# Handy target to remove intermediate files and backups
|
||||
clean:
|
||||
-rm -f *.o *~
|
||||
|
||||
# Handy target to remove everything that is easily regenerated
|
||||
clobber: clean
|
||||
-rm -f *.a tags TAGS config.c Makefile.pre $(TARGET) sedscript
|
||||
-rm -f *.so *.sl so_locations
|
||||
|
||||
|
||||
# Handy target to remove everything you don't want to distribute
|
||||
distclean: clobber
|
||||
-rm -f Makefile Setup
|
||||
297
PyLR/Makefile.pre.in
Normal file
297
PyLR/Makefile.pre.in
Normal file
|
|
@ -0,0 +1,297 @@
|
|||
# Universal Unix Makefile for Python extensions
|
||||
# =============================================
|
||||
|
||||
# Short Instructions
|
||||
# ------------------
|
||||
|
||||
# 1. Build and install Python (1.5 or newer).
|
||||
# 2. "make -f Makefile.pre.in boot"
|
||||
# 3. "make"
|
||||
# You should now have a shared library.
|
||||
|
||||
# Long Instructions
|
||||
# -----------------
|
||||
|
||||
# Build *and install* the basic Python 1.5 distribution. See the
|
||||
# Python README for instructions. (This version of Makefile.pre.in
|
||||
# only withs with Python 1.5, alpha 3 or newer.)
|
||||
|
||||
# Create a file Setup.in for your extension. This file follows the
|
||||
# format of the Modules/Setup.in file; see the instructions there.
|
||||
# For a simple module called "spam" on file "spammodule.c", it can
|
||||
# contain a single line:
|
||||
# spam spammodule.c
|
||||
# You can build as many modules as you want in the same directory --
|
||||
# just have a separate line for each of them in the Setup.in file.
|
||||
|
||||
# If you want to build your extension as a shared library, insert a
|
||||
# line containing just the string
|
||||
# *shared*
|
||||
# at the top of your Setup.in file.
|
||||
|
||||
# Note that the build process copies Setup.in to Setup, and then works
|
||||
# with Setup. It doesn't overwrite Setup when Setup.in is changed, so
|
||||
# while you're in the process of debugging your Setup.in file, you may
|
||||
# want to edit Setup instead, and copy it back to Setup.in later.
|
||||
# (All this is done so you can distribute your extension easily and
|
||||
# someone else can select the modules they actually want to build by
|
||||
# commenting out lines in the Setup file, without editing the
|
||||
# original. Editing Setup is also used to specify nonstandard
|
||||
# locations for include or library files.)
|
||||
|
||||
# Copy this file (Misc/Makefile.pre.in) to the directory containing
|
||||
# your extension.
|
||||
|
||||
# Run "make -f Makefile.pre.in boot". This creates Makefile
|
||||
# (producing Makefile.pre and sedscript as intermediate files) and
|
||||
# config.c, incorporating the values for sys.prefix, sys.exec_prefix
|
||||
# and sys.version from the installed Python binary. For this to work,
|
||||
# the python binary must be on your path. If this fails, try
|
||||
# make -f Makefile.pre.in Makefile VERSION=1.5 installdir=<prefix>
|
||||
# where <prefix> is the prefix used to install Python for installdir
|
||||
# (and possibly similar for exec_installdir=<exec_prefix>).
|
||||
|
||||
# Note: "make boot" implies "make clobber" -- it assumes that when you
|
||||
# bootstrap you may have changed platforms so it removes all previous
|
||||
# output files.
|
||||
|
||||
# If you are building your extension as a shared library (your
|
||||
# Setup.in file starts with *shared*), run "make" or "make sharedmods"
|
||||
# to build the shared library files. If you are building a statically
|
||||
# linked Python binary (the only solution of your platform doesn't
|
||||
# support shared libraries, and sometimes handy if you want to
|
||||
# distribute or install the resulting Python binary), run "make
|
||||
# python".
|
||||
|
||||
# Note: Each time you edit Makefile.pre.in or Setup, you must run
|
||||
# "make Makefile" before running "make".
|
||||
|
||||
# Hint: if you want to use VPATH, you can start in an empty
|
||||
# subdirectory and say (e.g.):
|
||||
# make -f ../Makefile.pre.in boot srcdir=.. VPATH=..
|
||||
|
||||
|
||||
# === Bootstrap variables (edited through "make boot") ===
|
||||
|
||||
# The prefix used by "make inclinstall libainstall" of core python
|
||||
installdir= /usr/local
|
||||
|
||||
# The exec_prefix used by the same
|
||||
exec_installdir=$(installdir)
|
||||
|
||||
# Source directory and VPATH in case you want to use VPATH.
|
||||
# (You will have to edit these two lines yourself -- there is no
|
||||
# automatic support as the Makefile is not generated by
|
||||
# config.status.)
|
||||
srcdir= .
|
||||
VPATH= .
|
||||
|
||||
# === Variables that you may want to customize (rarely) ===
|
||||
|
||||
# (Static) build target
|
||||
TARGET= python
|
||||
|
||||
# Installed python binary (used only by boot target)
|
||||
PYTHON= python
|
||||
|
||||
# Add more -I and -D options here
|
||||
CFLAGS= $(OPT) -I$(INCLUDEPY) -I$(EXECINCLUDEPY) $(DEFS)
|
||||
|
||||
# These two variables can be set in Setup to merge extensions.
|
||||
# See example[23].
|
||||
BASELIB=
|
||||
BASESETUP=
|
||||
|
||||
# === Variables set by makesetup ===
|
||||
|
||||
MODOBJS= _MODOBJS_
|
||||
MODLIBS= _MODLIBS_
|
||||
|
||||
# === Definitions added by makesetup ===
|
||||
|
||||
# === Variables from configure (through sedscript) ===
|
||||
|
||||
VERSION= @VERSION@
|
||||
CC= @CC@
|
||||
LINKCC= @LINKCC@
|
||||
SGI_ABI= @SGI_ABI@
|
||||
OPT= @OPT@
|
||||
LDFLAGS= @LDFLAGS@
|
||||
DEFS= @DEFS@
|
||||
LIBS= @LIBS@
|
||||
LIBM= @LIBM@
|
||||
LIBC= @LIBC@
|
||||
RANLIB= @RANLIB@
|
||||
MACHDEP= @MACHDEP@
|
||||
SO= @SO@
|
||||
LDSHARED= @LDSHARED@
|
||||
CCSHARED= @CCSHARED@
|
||||
LINKFORSHARED= @LINKFORSHARED@
|
||||
#@SET_CCC@
|
||||
|
||||
# Install prefix for architecture-independent files
|
||||
prefix= /usr/local
|
||||
|
||||
# Install prefix for architecture-dependent files
|
||||
exec_prefix= $(prefix)
|
||||
|
||||
# === Fixed definitions ===
|
||||
|
||||
# Shell used by make (some versions default to the login shell, which is bad)
|
||||
SHELL= /bin/sh
|
||||
|
||||
# Expanded directories
|
||||
BINDIR= $(exec_installdir)/bin
|
||||
LIBDIR= $(exec_prefix)/lib
|
||||
MANDIR= $(installdir)/man
|
||||
INCLUDEDIR= $(installdir)/include
|
||||
SCRIPTDIR= $(prefix)/lib
|
||||
|
||||
# Detailed destination directories
|
||||
BINLIBDEST= $(LIBDIR)/python$(VERSION)
|
||||
LIBDEST= $(SCRIPTDIR)/python$(VERSION)
|
||||
INCLUDEPY= $(INCLUDEDIR)/python$(VERSION)
|
||||
EXECINCLUDEPY= $(exec_installdir)/include/python$(VERSION)
|
||||
LIBP= $(exec_installdir)/lib/python$(VERSION)
|
||||
DESTSHARED= $(BINLIBDEST)/site-packages
|
||||
|
||||
LIBPL= $(LIBP)/config
|
||||
|
||||
PYTHONLIBS= $(LIBPL)/libpython$(VERSION).a
|
||||
|
||||
MAKESETUP= $(LIBPL)/makesetup
|
||||
MAKEFILE= $(LIBPL)/Makefile
|
||||
CONFIGC= $(LIBPL)/config.c
|
||||
CONFIGCIN= $(LIBPL)/config.c.in
|
||||
SETUP= $(LIBPL)/Setup
|
||||
|
||||
SYSLIBS= $(LIBM) $(LIBC)
|
||||
|
||||
ADDOBJS= $(LIBPL)/python.o config.o
|
||||
|
||||
# Portable install script (configure doesn't always guess right)
|
||||
INSTALL= $(LIBPL)/install-sh -c
|
||||
# Shared libraries must be installed with executable mode on some systems;
|
||||
# rather than figuring out exactly which, we always give them executable mode.
|
||||
# Also, making them read-only seems to be a good idea...
|
||||
INSTALL_SHARED= ${INSTALL} -m 555
|
||||
|
||||
# === Fixed rules ===
|
||||
|
||||
# Default target. This builds shared libraries only
|
||||
default: sharedmods
|
||||
|
||||
# Build everything
|
||||
all: static sharedmods
|
||||
|
||||
# Build shared libraries from our extension modules
|
||||
sharedmods: $(SHAREDMODS)
|
||||
|
||||
# Build a static Python binary containing our extension modules
|
||||
static: $(TARGET)
|
||||
$(TARGET): $(ADDOBJS) lib.a $(PYTHONLIBS) Makefile $(BASELIB)
|
||||
$(CC) $(LDFLAGS) $(ADDOBJS) lib.a $(PYTHONLIBS) \
|
||||
$(LINKPATH) $(BASELIB) $(MODLIBS) $(LIBS) $(SYSLIBS) \
|
||||
-o $(TARGET)
|
||||
|
||||
install: sharedmods
|
||||
if test ! -d $(DESTSHARED) ; then \
|
||||
mkdir $(DESTSHARED) ; else true ; fi
|
||||
-for i in X $(SHAREDMODS); do \
|
||||
if test $$i != X; \
|
||||
then $(INSTALL_SHARED) $$i $(DESTSHARED)/$$i; \
|
||||
fi; \
|
||||
done
|
||||
|
||||
# Build the library containing our extension modules
|
||||
lib.a: $(MODOBJS)
|
||||
-rm -f lib.a
|
||||
ar cr lib.a $(MODOBJS)
|
||||
-$(RANLIB) lib.a
|
||||
|
||||
# This runs makesetup *twice* to use the BASESETUP definition from Setup
|
||||
config.c Makefile: Makefile.pre Setup $(BASESETUP) $(MAKESETUP)
|
||||
$(MAKESETUP) \
|
||||
-m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP)
|
||||
$(MAKE) -f Makefile do-it-again
|
||||
|
||||
# Internal target to run makesetup for the second time
|
||||
do-it-again:
|
||||
$(MAKESETUP) \
|
||||
-m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP)
|
||||
|
||||
# Make config.o from the config.c created by makesetup
|
||||
config.o: config.c
|
||||
$(CC) $(CFLAGS) -c config.c
|
||||
|
||||
# Setup is copied from Setup.in *only* if it doesn't yet exist
|
||||
Setup:
|
||||
cp $(srcdir)/Setup.in Setup
|
||||
|
||||
# Make the intermediate Makefile.pre from Makefile.pre.in
|
||||
Makefile.pre: Makefile.pre.in sedscript
|
||||
sed -f sedscript $(srcdir)/Makefile.pre.in >Makefile.pre
|
||||
|
||||
# Shortcuts to make the sed arguments on one line
|
||||
P=prefix
|
||||
E=exec_prefix
|
||||
H=Generated automatically from Makefile.pre.in by sedscript.
|
||||
L=LINKFORSHARED
|
||||
|
||||
# Make the sed script used to create Makefile.pre from Makefile.pre.in
|
||||
sedscript: $(MAKEFILE)
|
||||
sed -n \
|
||||
-e '1s/.*/1i\\/p' \
|
||||
-e '2s%.*%# $H%p' \
|
||||
-e '/^VERSION=/s/^VERSION=[ ]*\(.*\)/s%@VERSION[@]%\1%/p' \
|
||||
-e '/^CC=/s/^CC=[ ]*\(.*\)/s%@CC[@]%\1%/p' \
|
||||
-e '/^CCC=/s/^CCC=[ ]*\(.*\)/s%#@SET_CCC[@]%CCC=\1%/p' \
|
||||
-e '/^LINKCC=/s/^LINKCC=[ ]*\(.*\)/s%@LINKCC[@]%\1%/p' \
|
||||
-e '/^OPT=/s/^OPT=[ ]*\(.*\)/s%@OPT[@]%\1%/p' \
|
||||
-e '/^LDFLAGS=/s/^LDFLAGS=[ ]*\(.*\)/s%@LDFLAGS[@]%\1%/p' \
|
||||
-e '/^DEFS=/s/^DEFS=[ ]*\(.*\)/s%@DEFS[@]%\1%/p' \
|
||||
-e '/^LIBS=/s/^LIBS=[ ]*\(.*\)/s%@LIBS[@]%\1%/p' \
|
||||
-e '/^LIBM=/s/^LIBM=[ ]*\(.*\)/s%@LIBM[@]%\1%/p' \
|
||||
-e '/^LIBC=/s/^LIBC=[ ]*\(.*\)/s%@LIBC[@]%\1%/p' \
|
||||
-e '/^RANLIB=/s/^RANLIB=[ ]*\(.*\)/s%@RANLIB[@]%\1%/p' \
|
||||
-e '/^MACHDEP=/s/^MACHDEP=[ ]*\(.*\)/s%@MACHDEP[@]%\1%/p' \
|
||||
-e '/^SO=/s/^SO=[ ]*\(.*\)/s%@SO[@]%\1%/p' \
|
||||
-e '/^LDSHARED=/s/^LDSHARED=[ ]*\(.*\)/s%@LDSHARED[@]%\1%/p' \
|
||||
-e '/^CCSHARED=/s/^CCSHARED=[ ]*\(.*\)/s%@CCSHARED[@]%\1%/p' \
|
||||
-e '/^$L=/s/^$L=[ ]*\(.*\)/s%@$L[@]%\1%/p' \
|
||||
-e '/^$P=/s/^$P=\(.*\)/s%^$P=.*%$P=\1%/p' \
|
||||
-e '/^$E=/s/^$E=\(.*\)/s%^$E=.*%$E=\1%/p' \
|
||||
$(MAKEFILE) >sedscript
|
||||
echo "/^#@SET_CCC@/d" >>sedscript
|
||||
echo "/^installdir=/s%=.*%= $(installdir)%" >>sedscript
|
||||
echo "/^exec_installdir=/s%=.*%=$(exec_installdir)%" >>sedscript
|
||||
echo "/^srcdir=/s%=.*%= $(srcdir)%" >>sedscript
|
||||
echo "/^VPATH=/s%=.*%= $(VPATH)%" >>sedscript
|
||||
echo "/^LINKPATH=/s%=.*%= $(LINKPATH)%" >>sedscript
|
||||
echo "/^BASELIB=/s%=.*%= $(BASELIB)%" >>sedscript
|
||||
echo "/^BASESETUP=/s%=.*%= $(BASESETUP)%" >>sedscript
|
||||
|
||||
# Bootstrap target
|
||||
boot: clobber
|
||||
VERSION=`$(PYTHON) -c "import sys; print sys.version[:3]"`; \
|
||||
installdir=`$(PYTHON) -c "import sys; print sys.prefix"`; \
|
||||
exec_installdir=`$(PYTHON) -c "import sys; print sys.exec_prefix"`; \
|
||||
$(MAKE) -f $(srcdir)/Makefile.pre.in VPATH=$(VPATH) srcdir=$(srcdir) \
|
||||
VERSION=$$VERSION \
|
||||
installdir=$$installdir \
|
||||
exec_installdir=$$exec_installdir \
|
||||
Makefile
|
||||
|
||||
# Handy target to remove intermediate files and backups
|
||||
clean:
|
||||
-rm -f *.o *~
|
||||
|
||||
# Handy target to remove everything that is easily regenerated
|
||||
clobber: clean
|
||||
-rm -f *.a tags TAGS config.c Makefile.pre $(TARGET) sedscript
|
||||
-rm -f *.so *.sl so_locations
|
||||
|
||||
|
||||
# Handy target to remove everything you don't want to distribute
|
||||
distclean: clobber
|
||||
-rm -f Makefile Setup
|
||||
45
PyLR/Parser.py
Normal file
45
PyLR/Parser.py
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
|
||||
__version__ = "$Id$"
|
||||
|
||||
import PyLRengine
|
||||
|
||||
|
||||
class Parser:
|
||||
|
||||
def __init__(self, lexer, actiontable, gototable, prodinfo):
|
||||
self.lexer = lexer
|
||||
self.actions = actiontable
|
||||
self.gotos = gototable
|
||||
# get the function from the function name
|
||||
# if we forgot to supply a function we get an AttributeError here
|
||||
try: self.prodinfo = map(lambda x,s=self: (x[0], getattr(s, x[1]), x[2]),
|
||||
prodinfo)
|
||||
except AttributeError:
|
||||
sys.stderr.write("Parser: error: forgot to supply a parser function\n")
|
||||
raise
|
||||
self.engine = None
|
||||
|
||||
# the unspecified function (the default for all productions)
|
||||
def unspecified(*args):
|
||||
return args[1]
|
||||
|
||||
def initengine(self, dodel=0):
|
||||
self.engine = PyLRengine.NewEngine(self.prodinfo, self.actions, self.gotos)
|
||||
if dodel:
|
||||
self.actions = []
|
||||
self.gotos = []
|
||||
self.prodinfo = []
|
||||
|
||||
def parse(self, text, verbose=0):
|
||||
self.initengine()
|
||||
self.lexer.settext(text)
|
||||
while 1:
|
||||
tok, val = self.lexer.scan(verbose)
|
||||
if not self.engine.parse(tok, val, verbose):
|
||||
break
|
||||
# need to add a method to the engine to
|
||||
# return the final value
|
||||
# and return that here
|
||||
return None
|
||||
|
||||
|
||||
169
PyLR/Parsers/GrammarParser.py
Normal file
169
PyLR/Parsers/GrammarParser.py
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
"""
|
||||
./Parsers/GrammarParser.py -- created Wed Feb 23 15:23:44 2000
|
||||
|
||||
This file was automatically generated by the PyLR parser generator.
|
||||
It defines the tables 'actiontable', 'gototable', and 'prodinfo'. These
|
||||
tables are used to give functionality to a parsing engine. It also defines
|
||||
A Parser class called GrammarParser which will use this engine. It's usage
|
||||
is indicated in GrammarParser's doc-string.
|
||||
"""
|
||||
#
|
||||
# this section contains source code added by the user
|
||||
# plus 'import PyLR'
|
||||
#
|
||||
|
||||
import PyLR
|
||||
|
||||
#
|
||||
# the action table ('s', 4) means shift to state 4,
|
||||
# ('r', 4) means reduce by production number 4
|
||||
# other entries are errors. each row represents a state
|
||||
# and each column a terminal lookahead symbol (excluding symbols with
|
||||
# Lexer.SKIPTOK).
|
||||
# Lexer symbols are:
|
||||
# ['EOF', 'LEX', 'CODE', 'CLASS', 'ID', 'COLON', 'SCOLON', 'OR', 'LPAREN', 'RPAREN', 'GDEL', '', '']
|
||||
#
|
||||
_actiontable = [
|
||||
[('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('a', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 1)],
|
||||
[('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 2)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 7), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 3)],
|
||||
[('r', 4), ('r', 4), ('r', 4), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 4), ('', -1)],
|
||||
[('r', 5), ('r', 5), ('r', 5), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 5), ('', -1)],
|
||||
[('r', 6), ('r', 6), ('r', 6), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 6), ('', -1)],
|
||||
[('r', 7), ('r', 7), ('r', 7), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 7), ('', -1)],
|
||||
[('r', 8), ('r', 8), ('r', 8), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 8), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('r', 9), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 9), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('r', 10), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 10), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('s', 16), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 18), ('s', 20), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('r', 11), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 11), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 12), ('r', 12), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 13), ('r', 13), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 14), ('r', 14), ('s', 23), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 24), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 25), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 15), ('r', 15), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 27), ('', -1), ('r', 16), ('r', 16), ('r', 16), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('r', 18), ('', -1), ('r', 18), ('r', 18), ('r', 18), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('r', 19), ('', -1), ('r', 19), ('r', 19), ('r', 19), ('', -1), ('', -1), ('', -1)]
|
||||
]
|
||||
|
||||
|
||||
|
||||
#
|
||||
# the goto table, each row represents a state
|
||||
# and each column, the nonterminal that was on the lhs of the
|
||||
# reduction
|
||||
#
|
||||
_gototable = [
|
||||
[1, 2, 3, 9, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, 4, None, 8, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, 6, 14, None, None, None, None],
|
||||
[None, None, None, None, None, 13, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, 17, 19, 22, 26],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, 21, 22, 26],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None]
|
||||
]
|
||||
|
||||
|
||||
|
||||
#
|
||||
# This is the prodinfo table. each row represents a production
|
||||
# the entries are the length of the production, the name of a method
|
||||
# in an instance of the GrammarParser class below that gets called
|
||||
# when that production occurs, and the index of the lhs in the
|
||||
# nonterminals (as in # the gototable)
|
||||
#
|
||||
_prodinfo = [
|
||||
(1, 'unspecified', 0), # pspec: gspec (unspecified)
|
||||
(2, 'unspecified', 0), # pspec: pydefs gspec (unspecified)
|
||||
(3, 'unspecified', 1), # gspec: 10 lhsdeflist 10 (unspecified)
|
||||
(2, 'unspecified', 2), # pydefs: pydefs pydef (unspecified)
|
||||
(1, 'unspecified', 2), # pydefs: pydef (unspecified)
|
||||
(1, 'lexdef', 3), # pydef: 1 (lexdef)
|
||||
(1, 'addcode', 3), # pydef: 2 (addcode)
|
||||
(1, 'classname', 3), # pydef: 3 (classname)
|
||||
(2, 'unspecified', 4), # lhsdeflist: lhsdeflist lhsdef (unspecified)
|
||||
(1, 'unspecified', 4), # lhsdeflist: lhsdef (unspecified)
|
||||
(4, 'lhsdef', 5), # lhsdef: 4 5 rhslist 6 (lhsdef)
|
||||
(1, 'singletolist', 6), # rhslist: rhs (singletolist)
|
||||
(3, 'rhslist_OR_rhs', 6), # rhslist: rhslist 7 rhs (rhslist_OR_rhs)
|
||||
(1, 'rhs_idlist', 7), # rhs: rhsidlist (rhs_idlist)
|
||||
(4, 'rhs_idlist_func', 7), # rhs: rhsidlist 8 4 9 (rhs_idlist_func)
|
||||
(1, 'unspecified', 8), # rhsidlist: idlist (unspecified)
|
||||
(0, 'rhseps', 8), # rhsidlist: (rhseps)
|
||||
(2, 'idl_idlistID', 9), # idlist: idlist 4 (idl_idlistID)
|
||||
(1, 'idlistID', 9), # idlist: 4 (idlistID)
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
||||
class GrammarParser(PyLR.Parser.Parser):
|
||||
"""
|
||||
this class was produced automatically by the PyLR parser generator.
|
||||
It is meant to be subclassed to produce a parser for the grammar
|
||||
|
||||
pspec: gspec (unspecified);
|
||||
pspec: pydefs gspec (unspecified);
|
||||
gspec: GDEL lhsdeflist GDEL (unspecified);
|
||||
pydefs: pydefs pydef (unspecified);
|
||||
pydefs: pydef (unspecified);
|
||||
pydef: LEX (lexdef);
|
||||
pydef: CODE (addcode);
|
||||
pydef: CLASS (classname);
|
||||
lhsdeflist: lhsdeflist lhsdef (unspecified);
|
||||
lhsdeflist: lhsdef (unspecified);
|
||||
lhsdef: ID COLON rhslist SCOLON (lhsdef);
|
||||
rhslist: rhs (singletolist);
|
||||
rhslist: rhslist OR rhs (rhslist_OR_rhs);
|
||||
rhs: rhsidlist (rhs_idlist);
|
||||
rhs: rhsidlist LPAREN ID RPAREN (rhs_idlist_func);
|
||||
rhsidlist: idlist (unspecified);
|
||||
rhsidlist: (rhseps);
|
||||
idlist: idlist ID (idl_idlistID);
|
||||
idlist: ID (idlistID);
|
||||
|
||||
While parsing input, if one of the above productions is recognized,
|
||||
a method of your sub-class (whose name is indicated in parens to the
|
||||
right) will be invoked. Names marked 'unspecified' should be ignored.
|
||||
|
||||
usage:
|
||||
|
||||
class MyGrammarParser(GrammarParser):
|
||||
# ...define the methods for the productions...
|
||||
|
||||
p = MyGrammarParser(); p.parse(text)
|
||||
"""
|
||||
def __init__(self):
|
||||
lexer = PyLR.Lexers.GrammarLex()
|
||||
PyLR.Parser.Parser.__init__(self, lexer, _actiontable, _gototable, _prodinfo)
|
||||
7
PyLR/Parsers/__init__.py
Normal file
7
PyLR/Parsers/__init__.py
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
"""if you want to make parsers available from this package directly,
|
||||
that is, if you want 'from PyLR.Parsers import RandomParser' to
|
||||
work, import the name here
|
||||
"""
|
||||
|
||||
from GrammarParser import GrammarParser
|
||||
|
||||
170
PyLR/Parsers/gram.py
Normal file
170
PyLR/Parsers/gram.py
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
"""
|
||||
out -- created Tue Dec 16 00:30:36 1997
|
||||
|
||||
This file was automatically generated by the PyLR parser generator.
|
||||
It defines the tables 'actiontable', 'gototable', and 'prodinfo'. These
|
||||
tables are used to give functionality to a parsing engine. It also defines
|
||||
A Parser class called GrammarParser which will use this engine. It's Usage is
|
||||
indicated in GrammarParser's doc-string.
|
||||
"""
|
||||
#
|
||||
# this section contains source code added by the user
|
||||
# plus 'import PyLR'
|
||||
#
|
||||
|
||||
import PyLR.Lexers
|
||||
import PyLR.Parser
|
||||
import PyLR
|
||||
|
||||
#
|
||||
# the action table ('s', 4) means shift to state 4,
|
||||
# ('r', 4) means reduce by production number 4
|
||||
# other entries are errors. each row represents a state
|
||||
# and each column a terminal lookahead symbol (plus EOF)
|
||||
# these symbols are ['LEX', 'CODE', 'CLASS', 'ID', 'COLON', 'SCOLON', 'OR', 'LPAREN', 'RPAREN', 'GDEL', 'EOF']
|
||||
#
|
||||
_actiontable = [
|
||||
[('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('a', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 1)],
|
||||
[('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 2)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 7), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 3)],
|
||||
[('r', 4), ('r', 4), ('r', 4), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 4), ('', -1)],
|
||||
[('r', 5), ('r', 5), ('r', 5), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 5), ('', -1)],
|
||||
[('r', 6), ('r', 6), ('r', 6), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 6), ('', -1)],
|
||||
[('r', 7), ('r', 7), ('r', 7), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 7), ('', -1)],
|
||||
[('r', 8), ('r', 8), ('r', 8), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 8), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('r', 9), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 9), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('r', 10), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 10), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('s', 16), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 18), ('s', 20), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('r', 11), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 11), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 12), ('r', 12), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 13), ('r', 13), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 14), ('r', 14), ('s', 23), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 24), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 25), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 15), ('r', 15), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 27), ('', -1), ('r', 16), ('r', 16), ('r', 16), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('r', 18), ('', -1), ('r', 18), ('r', 18), ('r', 18), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('r', 19), ('', -1), ('r', 19), ('r', 19), ('r', 19), ('', -1), ('', -1), ('', -1)]
|
||||
]
|
||||
|
||||
|
||||
|
||||
#
|
||||
# the goto table, each row represents a state
|
||||
# and each column, the nonterminal that was on the lhs of the
|
||||
# reduction
|
||||
#
|
||||
_gototable = [
|
||||
[1, 2, 3, 9, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, 4, None, 8, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, 6, 14, None, None, None, None],
|
||||
[None, None, None, None, None, 13, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, 17, 19, 22, 26],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, 21, 22, 26],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None]
|
||||
]
|
||||
|
||||
|
||||
|
||||
#
|
||||
# This is the prodinfo table. each row represents a production
|
||||
# the entries are the length of the production, the name of a method
|
||||
# in an instance of the GrammarParser class below that gets called
|
||||
# when that production occurs, and the index of the lhs in the
|
||||
# nonterminals (as in # the gototable)
|
||||
#
|
||||
_prodinfo = [
|
||||
(1, 'unspecified', 0), # pspec -> ['gspec']
|
||||
(2, 'unspecified', 0), # pspec -> ['pydefs', 'gspec']
|
||||
(3, 'unspecified', 1), # gspec -> ['GDEL', 'lhsdeflist', 'GDEL']
|
||||
(2, 'unspecified', 2), # pydefs -> ['pydefs', 'pydef']
|
||||
(1, 'unspecified', 2), # pydefs -> ['pydef']
|
||||
(1, 'lexdef', 3), # pydef -> ['LEX']
|
||||
(1, 'addcode', 3), # pydef -> ['CODE']
|
||||
(1, 'classname', 3), # pydef -> ['CLASS']
|
||||
(2, 'unspecified', 4), # lhsdeflist -> ['lhsdeflist', 'lhsdef']
|
||||
(1, 'unspecified', 4), # lhsdeflist -> ['lhsdef']
|
||||
(4, 'lhsdef', 5), # lhsdef -> ['ID', 'COLON', 'rhslist', 'SCOLON']
|
||||
(1, 'singletolist', 6), # rhslist -> ['rhs']
|
||||
(3, 'rhslist_OR_rhs', 6), # rhslist -> ['rhslist', 'OR', 'rhs']
|
||||
(1, 'rhs_idlist', 7), # rhs -> ['rhsidlist']
|
||||
(4, 'rhs_idlist_func', 7), # rhs -> ['rhsidlist', 'LPAREN', 'ID', 'RPAREN']
|
||||
(1, 'unspecified', 8), # rhsidlist -> ['idlist']
|
||||
(0, 'rhseps', 8), # rhsidlist -> []
|
||||
(2, 'idl_idlistID', 9), # idlist -> ['idlist', 'ID']
|
||||
(1, 'idlistID', 9), # idlist -> ['ID']
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
||||
class GrammarParser (PyLR.Parser.Parser):
|
||||
"""
|
||||
this class was produced automatically by the PyLR parser generator.
|
||||
It is meant to be subclassed to produce a parser for the grammar
|
||||
|
||||
pspec -> gspec (unspecified)
|
||||
| pydefs gspec; (unspecified)
|
||||
gspec -> GDEL lhsdeflist GDEL; (unspecified)
|
||||
pydefs -> pydefs pydef (unspecified)
|
||||
| pydef; (unspecified)
|
||||
pydef -> LEX (lexdef)
|
||||
| CODE (addcode)
|
||||
| CLASS; (classname)
|
||||
lhsdeflist -> lhsdeflist lhsdef (unspecified)
|
||||
| lhsdef; (unspecified)
|
||||
lhsdef -> ID COLON rhslist SCOLON; (lhsdef)
|
||||
rhslist -> rhs (singletolist)
|
||||
| rhslist OR rhs; (rhslist_OR_rhs)
|
||||
rhs -> rhsidlist (rhs_idlist)
|
||||
| rhsidlist LPAREN ID RPAREN; (rhs_idlist_func)
|
||||
rhsidlist -> idlist (unspecified)
|
||||
| ; (rhseps)
|
||||
idlist -> idlist ID (idl_idlistID)
|
||||
| ID; (idlistID)
|
||||
|
||||
While parsing input, if one of the above productions is recognized,
|
||||
a method of your sub-class (whose name is indicated in parens to the
|
||||
right) will be invoked. Names marked 'unspecified' should be ignored.
|
||||
|
||||
usage:
|
||||
|
||||
class MyGrammarParser(GrammarParser):
|
||||
# ...define the methods for the productions...
|
||||
|
||||
p = MyGrammarParser(); p.parse(text)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
lexer = PyLR.Lexers.GrammarLex()
|
||||
PyLR.Parser.Parser.__init__(self, lexer, _actiontable, _gototable, _prodinfo)
|
||||
81
PyLR/PyLRengine.h
Normal file
81
PyLR/PyLRengine.h
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
#ifndef Py_PYLRENGINE_H
|
||||
#define Py_PYLRENGINE_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define EOBUF -1
|
||||
|
||||
struct inbufdatum {
|
||||
PyObject* pylrval;
|
||||
int tok;
|
||||
};
|
||||
|
||||
struct inbufdata {
|
||||
struct inbufdatum** chunk;
|
||||
struct inbufdata* next;
|
||||
};
|
||||
|
||||
typedef struct inbuf_struct {
|
||||
struct inbufdata* data;
|
||||
int bi;
|
||||
int nextinput;
|
||||
int chunksize;
|
||||
} inbuftype;
|
||||
|
||||
struct stackdatum {
|
||||
int state;
|
||||
int tok;
|
||||
PyObject* pylrval;
|
||||
};
|
||||
|
||||
struct stackdata {
|
||||
struct stackdatum** bucket;
|
||||
struct stackdata* next;
|
||||
};
|
||||
|
||||
typedef struct stack_struct {
|
||||
struct stackdata* data;
|
||||
int si;
|
||||
int chunksize;
|
||||
} stacktype;
|
||||
|
||||
typedef struct prodinfo_struct {
|
||||
int len;
|
||||
PyObject* func;
|
||||
int lhsi;
|
||||
} prodinfo_type;
|
||||
|
||||
typedef struct actionstruct{
|
||||
int arg;
|
||||
short act;
|
||||
} actiontype;
|
||||
|
||||
/***********************************************************************
|
||||
* the possible values of the action table
|
||||
***********************************************************************/
|
||||
|
||||
#define SHIFT 's'
|
||||
#define REDUCE 'r'
|
||||
#define ACCEPT 'a'
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
inbuftype* inbuf;
|
||||
stacktype* stack;
|
||||
prodinfo_type** prodinfo;
|
||||
int prodinfosize;
|
||||
int** gototable;
|
||||
int goto_x;
|
||||
int goto_y;
|
||||
actiontype*** actiontable;
|
||||
int act_x;
|
||||
int act_y;
|
||||
int toksadded;
|
||||
} parserobject;
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_PYLRENGINE_H */
|
||||
717
PyLR/PyLRenginemodule.c
Normal file
717
PyLR/PyLRenginemodule.c
Normal file
|
|
@ -0,0 +1,717 @@
|
|||
/***********************************************************************
|
||||
* This file defines an ParseEngine (LR), It references a Parsing table
|
||||
* that is defined in python.
|
||||
*
|
||||
* This defines a new type object in Python, called a Parser. It has
|
||||
* 3 methods, .parse(int: token, char *: text),
|
||||
* of them). .setaction(production), and .getaction(production).
|
||||
*
|
||||
* $Id$
|
||||
*
|
||||
***********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "Python.h"
|
||||
#include "PyLRengine.h"
|
||||
|
||||
/***********************************************************************
|
||||
* PyLRengine Error things
|
||||
***********************************************************************/
|
||||
static PyObject* PyLRParseError;
|
||||
|
||||
#define CHECK_MALLOC(obj) \
|
||||
if (!(obj = (PyObject *) malloc (sizeof(PyObject)))) { \
|
||||
PyErr_SetString(PyExc_MemoryError, "no more memory"); \
|
||||
return NULL; \
|
||||
}
|
||||
|
||||
#define onError(message) \
|
||||
{ PyErr_SetString(PyExc_ParseError, message); return NULL; }
|
||||
|
||||
|
||||
|
||||
/***********************************************************************
|
||||
* The engines input buffer. has a chunksize controllable from within
|
||||
* python. functions are initinbufdata, init_inbuf, incbi, add2buf,
|
||||
* gettoken, petpylrval, dumpinbuf
|
||||
***********************************************************************/
|
||||
|
||||
static struct inbufdata * init_inbufdata(chunksize)
|
||||
int chunksize;
|
||||
{
|
||||
struct inbufdata * ibd;
|
||||
int i;
|
||||
|
||||
if ((ibd = (struct inbufdata *) malloc(sizeof(struct inbufdata))) == NULL) {
|
||||
printf("No more Memory!\n");
|
||||
exit(1);
|
||||
}
|
||||
if ((ibd->chunk = (struct inbufdatum **) malloc(sizeof(struct inbufdatum *) * chunksize)) == NULL) {
|
||||
printf("No more Memory!\n");
|
||||
exit(1);
|
||||
}
|
||||
for (i=0; i<chunksize; i++) {
|
||||
if ((ibd->chunk[i] = (struct inbufdatum *) malloc(sizeof(struct inbufdatum))) == NULL) {
|
||||
onError("Memory");
|
||||
}
|
||||
ibd->chunk[i]->tok = EOBUF;
|
||||
ibd->chunk[i]->pylrval = NULL;
|
||||
}
|
||||
ibd->next = NULL;
|
||||
return ibd;
|
||||
}
|
||||
|
||||
static inbuftype * init_inbuf(chunksize)
|
||||
int chunksize;
|
||||
{
|
||||
inbuftype * ib;
|
||||
if ((ib = (inbuftype *)malloc(sizeof(inbuftype))) == NULL) {
|
||||
printf("No more Memory!\n");
|
||||
exit(1);
|
||||
}
|
||||
ib->bi = 0;
|
||||
ib->data = init_inbufdata(chunksize);
|
||||
ib->chunksize = chunksize;
|
||||
ib->nextinput = 0;
|
||||
return ib;
|
||||
}
|
||||
|
||||
static void incbi(inbuf)
|
||||
inbuftype * inbuf;
|
||||
{
|
||||
struct inbufdata * tmpdata;
|
||||
if ((! ((inbuf->bi + 1) % inbuf->chunksize)) && (inbuf->bi != 0)) {
|
||||
tmpdata = inbuf->data->next;
|
||||
free(inbuf->data);
|
||||
inbuf->data = tmpdata;
|
||||
}
|
||||
inbuf->bi++;
|
||||
}
|
||||
|
||||
static void add2buf(inbuf, tok, pylrval)
|
||||
inbuftype * inbuf; int tok; PyObject * pylrval;
|
||||
{
|
||||
struct inbufdata * orgibd = inbuf->data;
|
||||
struct inbufdata * newibd;
|
||||
while(inbuf->data->next != NULL)
|
||||
inbuf->data = inbuf->data->next;
|
||||
if ((! (inbuf->nextinput % inbuf->chunksize)) && (inbuf->nextinput != 0)) { /* make new chunk at end */
|
||||
newibd = init_inbufdata(inbuf->chunksize);
|
||||
newibd->chunk[0]->tok = tok;
|
||||
newibd->chunk[0]->pylrval = pylrval;
|
||||
inbuf->data->next = newibd;
|
||||
} else {
|
||||
inbuf->data->chunk[(inbuf->nextinput % inbuf->chunksize)]->tok = tok;
|
||||
inbuf->data->chunk[(inbuf->nextinput % inbuf->chunksize)]->pylrval = pylrval;
|
||||
}
|
||||
inbuf->nextinput++;
|
||||
inbuf->data = orgibd;
|
||||
}
|
||||
|
||||
|
||||
#define gettoken(ib) ((ib)->data->chunk[ (ib)->bi % (ib)->chunksize]->tok)
|
||||
#define getpylrval(ib) ((ib)->data->chunk[ (ib)->bi % (ib)->chunksize]->pylrval)
|
||||
|
||||
static void dumpinbuf(inbuf)
|
||||
|
||||
inbuftype* inbuf;
|
||||
{
|
||||
int i, j;
|
||||
struct inbufdata * orgibd = inbuf->data;
|
||||
printf ("inbuf at %p with bi at %d and chunksize of %d and nextinput at %d:\n", inbuf, \
|
||||
inbuf->bi, inbuf->chunksize, inbuf->nextinput);
|
||||
j = 0;
|
||||
for (inbuf->data; inbuf->data != NULL; inbuf->data = inbuf->data->next) {
|
||||
printf("\tchunk %d:\n", j);
|
||||
for (i=0; i < inbuf->chunksize; i++) {
|
||||
printf("\t\tchunk[%d]->tok = %d; pylrval at %p\n",
|
||||
i,
|
||||
inbuf->data->chunk[i]->tok,
|
||||
inbuf->data->chunk[i]->pylrval);
|
||||
}
|
||||
j++;
|
||||
}
|
||||
inbuf->data = orgibd;
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* the Stack
|
||||
***********************************************************************/
|
||||
|
||||
static stacktype * init_stack (stackchunksize)
|
||||
int stackchunksize;
|
||||
{
|
||||
stacktype * newstack;
|
||||
if (( newstack = (stacktype *) malloc(sizeof(stacktype))) == NULL) {
|
||||
PyErr_SetString(PyLRengineError, "Memory Error");
|
||||
return NULL;
|
||||
}
|
||||
newstack->si = 0;
|
||||
newstack->data = NULL;
|
||||
newstack->chunksize = stackchunksize;
|
||||
return newstack;
|
||||
}
|
||||
|
||||
|
||||
static struct stackdata * init_stackdata (stackchunksize)
|
||||
int stackchunksize;
|
||||
{
|
||||
struct stackdata * newstackdata;
|
||||
int i;
|
||||
|
||||
if ((newstackdata = (struct stackdata *) malloc (sizeof (struct stackdata))) == NULL) {
|
||||
PyErr_SetString(PyLRengineError, "Memory Error");
|
||||
return NULL;
|
||||
}
|
||||
if ((newstackdata->bucket = (struct stackdatum **) malloc (sizeof (struct stackdatum *) * stackchunksize)) == NULL) {
|
||||
PyErr_SetString(PyLRengineError, "Memory Error");
|
||||
return NULL;
|
||||
}
|
||||
for (i=0; i < stackchunksize; i++) {
|
||||
if ((newstackdata->bucket[i] = (struct stackdatum *) malloc(sizeof (struct stackdatum))) == NULL) {
|
||||
onError("Memory Error");
|
||||
}
|
||||
newstackdata->bucket[i]->state = -1;
|
||||
newstackdata->bucket[i]->tok = -1;
|
||||
newstackdata->bucket[i]->pylrval = NULL;
|
||||
}
|
||||
newstackdata->next = NULL;
|
||||
return newstackdata;
|
||||
}
|
||||
|
||||
|
||||
static void push (stack, token, state, pylrval)
|
||||
stacktype * stack;
|
||||
int token;
|
||||
int state;
|
||||
PyObject * pylrval;
|
||||
{
|
||||
struct stackdata *newstackdata;
|
||||
if (! (stack->si % stack->chunksize)) {
|
||||
newstackdata = init_stackdata(stack->chunksize);
|
||||
newstackdata->bucket[0]->tok = token;
|
||||
newstackdata->bucket[0]->state = state;
|
||||
newstackdata->bucket[0]->pylrval = pylrval;
|
||||
newstackdata->next = stack->data;
|
||||
stack->data = newstackdata;
|
||||
} else {
|
||||
stack->data->bucket[stack->si % stack->chunksize]->tok = token;
|
||||
stack->data->bucket[stack->si % stack->chunksize]->state = state;
|
||||
stack->data->bucket[stack->si % stack->chunksize]->pylrval = pylrval;
|
||||
}
|
||||
Py_XINCREF(pylrval);
|
||||
stack->si++;
|
||||
}
|
||||
|
||||
static void show_stack(stack)
|
||||
struct stack_struct * stack;
|
||||
{
|
||||
struct stackdata * orgstackdata;
|
||||
int i;
|
||||
orgstackdata = stack->data;
|
||||
printf("stack at %p:\n", stack);
|
||||
for (stack->data; stack->data != NULL; stack->data = stack->data->next) {
|
||||
printf("stack->data at %p\n", stack->data);
|
||||
for (i=0; i<stack->chunksize; i++) {
|
||||
printf ("stack->data->bucket[%d] = (%d, %d, %p)\n",
|
||||
i,
|
||||
stack->data->bucket[i]->tok,
|
||||
stack->data->bucket[i]->state,
|
||||
stack->data->bucket[i]->pylrval);
|
||||
}
|
||||
}
|
||||
stack->data = orgstackdata;
|
||||
}
|
||||
|
||||
|
||||
/***********************************************************************
|
||||
* This function returns the python objects stored on the stack so that
|
||||
* they can then be passed to the appropriate function (popping the stack
|
||||
* only occurs when a reduce operation is called, so the python objects
|
||||
* returned get passed to the function associated with the production that
|
||||
* is associated with popping items from the stack. see the method parser_parse
|
||||
* for how this works in more detail
|
||||
***********************************************************************/
|
||||
|
||||
static PyObject ** pop(stack, amt)
|
||||
stacktype * stack;
|
||||
int amt;
|
||||
{
|
||||
struct stackdata * tmpsd;
|
||||
PyObject ** popped_pylrvals;
|
||||
int c = 0;
|
||||
if (amt == 0)
|
||||
return NULL;
|
||||
if ((popped_pylrvals = (PyObject **)malloc(sizeof(PyObject *) * amt)) == NULL)
|
||||
onError("Memory Error");
|
||||
if (stack->si < amt) {
|
||||
PyErr_SetString(PyLRengineError, "popping too much from stack!!!");
|
||||
return 0;
|
||||
}
|
||||
while (amt > 0 && stack->si >= 0) {
|
||||
if ((popped_pylrvals[c] = (PyObject *)malloc(sizeof(PyObject))) == NULL)
|
||||
onError("Memory Error");
|
||||
if ((stack->si - 1) % stack->chunksize) {
|
||||
stack->data->bucket[(stack->si -1) % stack->chunksize]->tok = -1;
|
||||
stack->data->bucket[(stack->si -1) % stack->chunksize]->state = -1;
|
||||
popped_pylrvals[c] = stack->data->bucket[(stack->si -1) % stack->chunksize]->pylrval;
|
||||
stack->data->bucket[(stack->si -1) % stack->chunksize]->pylrval = NULL;
|
||||
} else {
|
||||
stack->data->bucket[0]->tok = -1;
|
||||
stack->data->bucket[0]->state = -1;
|
||||
popped_pylrvals[c] = stack->data->bucket[0]->pylrval;
|
||||
stack->data->bucket[0]->pylrval = NULL;
|
||||
tmpsd = stack->data->next;
|
||||
free(stack->data);
|
||||
stack->data = tmpsd;
|
||||
}
|
||||
amt--; stack->si--; c++; /* not quite ;) */
|
||||
}
|
||||
return popped_pylrvals;
|
||||
}
|
||||
|
||||
#define stackstate(stack) \
|
||||
(((stack)->data == NULL)?\
|
||||
0:\
|
||||
(stack)->data->bucket[((stack)->si - 1) % (stack)->chunksize]->state)
|
||||
|
||||
|
||||
/***********************************************************************
|
||||
* Production Info related functions
|
||||
***********************************************************************/
|
||||
|
||||
static prodinfo_type ** Py_prodinfo2prodinfo (parserobj, py_prodinfo)
|
||||
parserobject * parserobj;
|
||||
PyObject * py_prodinfo;
|
||||
{
|
||||
prodinfo_type ** prodinfo;
|
||||
PyObject * prodtuple;
|
||||
int listsize;
|
||||
register int listi;
|
||||
listsize = PyList_Size(py_prodinfo);
|
||||
if (listsize == -1)
|
||||
onError("production info table is not a list!");
|
||||
parserobj->prodinfosize = listsize;
|
||||
if ((prodinfo = (prodinfo_type **) malloc (sizeof (prodinfo_type *) * listsize)) == NULL)
|
||||
onError("No more Mem!");
|
||||
for (listi=0; listi < listsize; listi++) {
|
||||
if ((prodinfo[listi] = (prodinfo_type *) malloc (sizeof(prodinfo_type))) == NULL)
|
||||
onError("Memory");
|
||||
prodtuple = PyList_GetItem(py_prodinfo, listi);
|
||||
if (! PyTuple_Check(prodtuple))
|
||||
onError("Corrput Prodinfo table, must contain tuples of (len, callable)");
|
||||
prodinfo[listi]->len = (short int) PyInt_AsLong(PyTuple_GetItem(prodtuple, 0));
|
||||
if ((prodinfo[listi]->func = (PyObject *) malloc (sizeof(PyObject))) == NULL)
|
||||
onError("Memory");
|
||||
prodinfo[listi]->func = PyTuple_GetItem(prodtuple, 1);
|
||||
prodinfo[listi]->lhsi = (int) PyInt_AsLong(PyTuple_GetItem(prodtuple, 2));
|
||||
if ((! PyCallable_Check(prodinfo[listi]->func)) && (prodinfo[listi]->func != Py_None))
|
||||
onError("corrupt prodinfo data, must contain tuples of (len, callable)");
|
||||
Py_XINCREF(prodinfo[listi]->func);
|
||||
}
|
||||
return prodinfo;
|
||||
}
|
||||
|
||||
static PyObject * prodinfo2Py_prodinfo(prodinfo, sz)
|
||||
prodinfo_type ** prodinfo;
|
||||
int sz;
|
||||
{
|
||||
int i;
|
||||
PyObject * list;
|
||||
PyObject * tuple;
|
||||
PyObject * len;
|
||||
PyObject * func;
|
||||
PyObject * lhsi;
|
||||
list = PyList_New(sz);
|
||||
for (i=0; i<sz; i++) {
|
||||
tuple = PyTuple_New(3);
|
||||
len = Py_BuildValue("i", prodinfo[i]->len);
|
||||
lhsi = Py_BuildValue("i", prodinfo[i]->lhsi);
|
||||
func = prodinfo[i]->func;
|
||||
PyTuple_SetItem(tuple, 0, len);
|
||||
PyTuple_SetItem(tuple, 1, func);
|
||||
PyTuple_SetItem(tuple, 2, lhsi);
|
||||
PyList_SetItem(list, i, tuple);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* the goto table, show and set routines
|
||||
***********************************************************************/
|
||||
|
||||
#define GOTOERR -1
|
||||
|
||||
static void * mkgototable(parser, pygotos)
|
||||
parserobject * parser;
|
||||
PyObject * pygotos;
|
||||
{
|
||||
register int outerlen;
|
||||
register int outerct;
|
||||
register int innerlen;
|
||||
register int innerct;
|
||||
int ** gotos;
|
||||
PyObject * innerlist;
|
||||
PyObject * py_entry;
|
||||
outerlen = PyList_Size(pygotos);
|
||||
parser->goto_x = 0;
|
||||
parser->goto_y = 0;
|
||||
parser->gototable = NULL;
|
||||
if (outerlen == -1)
|
||||
onError("goto table must be a list of lists!");
|
||||
if ((gotos = (int **) malloc(sizeof(int *) * outerlen)) == NULL)
|
||||
onError("Memory Error");
|
||||
for (outerct = 0; outerct < outerlen; outerct++) {
|
||||
innerlist = PyList_GetItem(pygotos, outerct);
|
||||
innerlen = PyList_Size(innerlist);
|
||||
if (innerlen == -1)
|
||||
onError ("goto table must be a list of lists!");
|
||||
if ((gotos[outerct] = (int *) malloc (sizeof(int) * innerlen)) == NULL)
|
||||
onError("Memory Error");
|
||||
for (innerct = 0; innerct < innerlen; innerct++) {
|
||||
py_entry = PyList_GetItem(innerlist, innerct);
|
||||
if ((! PyInt_Check( py_entry)) && (py_entry != Py_None))
|
||||
onError("goto table must be a list of list of either ints or None!");
|
||||
if (py_entry == Py_None) {
|
||||
gotos[outerct][innerct] = GOTOERR;
|
||||
}
|
||||
else {
|
||||
gotos[outerct][innerct] = (int) PyInt_AsLong(py_entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
parser->goto_x = outerlen;
|
||||
parser->goto_y = innerlen;
|
||||
parser->gototable = gotos;
|
||||
}
|
||||
|
||||
|
||||
static PyObject * show_gotos(self, args)
|
||||
parserobject * self;
|
||||
PyObject * args;
|
||||
{
|
||||
register int x;
|
||||
register int y;
|
||||
for (x=0; x < self->goto_x; x++) {
|
||||
for (y=0; y < self->goto_y; y++) {
|
||||
printf("%d ", self->gototable[x][y]);
|
||||
}
|
||||
printf ("\n");
|
||||
}
|
||||
Py_INCREF(Py_None);
|
||||
return Py_None;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/***********************************************************************
|
||||
* Action Table set and show
|
||||
***********************************************************************/
|
||||
#define ACTERR -1
|
||||
|
||||
static void * mkactiontable(parser, pyactions)
|
||||
parserobject * parser; PyObject * pyactions;
|
||||
{
|
||||
register int outerlen;
|
||||
register int outerct;
|
||||
register int innerlen;
|
||||
register int innerct;
|
||||
actiontype *** actions;
|
||||
PyObject * innerlist;
|
||||
PyObject * py_tuple;
|
||||
PyObject * py_act;
|
||||
char * cact;
|
||||
PyObject * py_arg;
|
||||
int tuplelen;
|
||||
parser->act_x = 0;
|
||||
parser->act_y = 0;
|
||||
parser->actiontable = NULL;
|
||||
outerlen = PyList_Size(pyactions);
|
||||
if (outerlen == -1)
|
||||
onError("goto table must be a list of lists!");
|
||||
if ((actions = (actiontype ***) malloc(sizeof(actiontype *) * outerlen)) == NULL)
|
||||
onError("Memory Error");
|
||||
for (outerct = 0; outerct < outerlen; outerct++) {
|
||||
innerlist = PyList_GetItem(pyactions, outerct);
|
||||
innerlen = PyList_Size(innerlist);
|
||||
if (innerlen == -1)
|
||||
onError ("goto table must be a list of lists!");
|
||||
if ((actions[outerct] = (actiontype **) malloc (sizeof(actiontype *) * innerlen)) == NULL)
|
||||
onError("Memory Error");
|
||||
for (innerct = 0; innerct < innerlen; innerct++) {
|
||||
if ((actions[outerct][innerct] = (actiontype *) malloc(sizeof(actiontype))) == NULL)
|
||||
onError("Memory Error");
|
||||
py_tuple = PyList_GetItem(innerlist, innerct);
|
||||
if (! PyTuple_Check(py_tuple))
|
||||
onError("goto table must be a list of list of tuples!");
|
||||
tuplelen = PyTuple_Size(py_tuple);
|
||||
if (tuplelen != 2)
|
||||
onError("goto table must contain entries of tuples of length 2");
|
||||
py_act = PyTuple_GetItem(py_tuple, 0);
|
||||
py_arg = PyTuple_GetItem(py_tuple, 1);
|
||||
if ((! PyString_Check(py_act)) || (! PyInt_Check(py_arg)))
|
||||
onError("goto table's entries must be tuples of type string, int");
|
||||
actions[outerct][innerct]->act = (short) *(PyString_AsString(py_act));
|
||||
actions[outerct][innerct]->arg = (int) PyInt_AsLong(py_arg);
|
||||
}
|
||||
}
|
||||
parser->act_x = outerlen;
|
||||
parser->act_y = innerlen;
|
||||
parser->actiontable = actions;
|
||||
}
|
||||
|
||||
|
||||
static PyObject * show_actions(self, args)
|
||||
parserobject * self;
|
||||
PyObject * args;
|
||||
{
|
||||
register int x;
|
||||
register int y;
|
||||
for (x=0; x < self->act_x; x++) {
|
||||
for (y=0; y < self->act_y; y++) {
|
||||
printf("(%c, %d), ", self->actiontable[x][y]->act, self->actiontable[x][y]->arg);
|
||||
}
|
||||
printf ("\n");
|
||||
}
|
||||
Py_INCREF(Py_None);
|
||||
return Py_None;
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* Parser Type Info and internal routines
|
||||
***********************************************************************/
|
||||
|
||||
|
||||
staticforward PyTypeObject ParserType;
|
||||
|
||||
#define is_parserobject(v) ((v)->ob_type == &ParserType)
|
||||
|
||||
|
||||
/***********************************************************************
|
||||
* Parser Methods
|
||||
***********************************************************************/
|
||||
|
||||
static PyObject *
|
||||
parser_parse(self, args)
|
||||
parserobject * self;
|
||||
PyObject * args;
|
||||
{
|
||||
int tok, curstate, i, tuple_i;
|
||||
PyObject * pylrval;
|
||||
PyObject * fargs;
|
||||
PyObject * fres;
|
||||
actiontype * act;
|
||||
PyObject ** pylrvals;
|
||||
if (! PyArg_ParseTuple(args, "iO", &tok, &pylrval)) {
|
||||
return NULL;
|
||||
}
|
||||
Py_XINCREF(pylrval);
|
||||
add2buf(self->inbuf, tok, pylrval);
|
||||
if ( self->toksadded < 1) {
|
||||
self->toksadded++;
|
||||
return Py_BuildValue("i", 1);
|
||||
}
|
||||
if ((stackstate(self->stack) < 0) || (gettoken(self->inbuf) < 0))
|
||||
onError("PyLRTableIndexError");
|
||||
act = self->actiontable[stackstate(self->stack)][gettoken(self->inbuf)];
|
||||
if (act == NULL) {
|
||||
onError("PyLRTableError, couldn't retrieve action");
|
||||
}
|
||||
if (act->act == SHIFT) {
|
||||
push(self->stack, gettoken(self->inbuf), act->arg, getpylrval(self->inbuf));
|
||||
incbi(self->inbuf);
|
||||
return Py_BuildValue("i", 1);
|
||||
} else if (act->act == REDUCE) {
|
||||
pylrvals = pop(self->stack, self->prodinfo[act->arg - 1]->len);
|
||||
if (PyErr_Occurred()) { return NULL; }
|
||||
curstate = stackstate(self->stack);
|
||||
fargs = PyTuple_New(self->prodinfo[act->arg - 1]->len);
|
||||
for (i=0; i < self->prodinfo[act->arg - 1]->len ; i++) {
|
||||
tuple_i = ((self->prodinfo[act->arg -1]->len - i) -1);
|
||||
PyTuple_SetItem(fargs, tuple_i, pylrvals[i]);
|
||||
}
|
||||
fres = PyObject_CallObject(self->prodinfo[act->arg - 1]->func, fargs);
|
||||
if (PyErr_Occurred())
|
||||
return NULL;
|
||||
Py_XINCREF(fres);
|
||||
/* Py_DECREF(fargs);*/
|
||||
push(self->stack, act->arg, self->gototable[curstate][self->prodinfo[act->arg - 1]->lhsi], fres);
|
||||
return Py_BuildValue("i", 1);
|
||||
} else if (act->act == ACCEPT) {
|
||||
return Py_BuildValue("i", 0);
|
||||
} else {
|
||||
PyErr_SetString(PyLRengineError, "SyntaxError while parsing");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
parser_show_stack(self, args)
|
||||
parserobject * self;
|
||||
PyObject * args;
|
||||
{
|
||||
if (! PyArg_ParseTuple(args, ""))
|
||||
return NULL;
|
||||
show_stack(self->stack);
|
||||
Py_XINCREF(Py_None);
|
||||
return Py_None;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
parser_show_inbuf(self, args)
|
||||
parserobject * self;
|
||||
PyObject * args;
|
||||
{
|
||||
if (! PyArg_ParseTuple(args, ""))
|
||||
return NULL;
|
||||
dumpinbuf(self->inbuf);
|
||||
Py_XINCREF(Py_None);
|
||||
return Py_None;
|
||||
}
|
||||
|
||||
|
||||
static struct PyMethodDef Parser_methods[] = {
|
||||
{ "parse", parser_parse, 1},
|
||||
{ "showstack", parser_show_stack, 1},
|
||||
{ "showbuf", parser_show_inbuf, 1},
|
||||
{ "showgotos", show_gotos, 1},
|
||||
{ "showacts", show_actions, 1},
|
||||
{ NULL, NULL}, /* sentinel */
|
||||
};
|
||||
|
||||
/***********************************************************************
|
||||
* Basic type operations for ParserType
|
||||
***********************************************************************/
|
||||
|
||||
static parserobject *
|
||||
newparserobject (pyprodinfo, pyactions, pygotos, bufchunksize, stackchunksize)
|
||||
PyObject * pyprodinfo;
|
||||
PyObject * pyactions;
|
||||
PyObject * pygotos;
|
||||
int bufchunksize;
|
||||
int stackchunksize;
|
||||
{
|
||||
parserobject *p;
|
||||
p = PyObject_NEW(parserobject, &ParserType);
|
||||
if (p == NULL)
|
||||
onError("memory in init obj...");
|
||||
p->stack = init_stack(stackchunksize);
|
||||
p->inbuf = init_inbuf(bufchunksize);
|
||||
mkgototable(p, pygotos);
|
||||
mkactiontable(p, pyactions);
|
||||
p->prodinfo = Py_prodinfo2prodinfo(p, pyprodinfo);
|
||||
p->toksadded = 0;
|
||||
if (PyErr_Occurred())
|
||||
return NULL;
|
||||
return p;
|
||||
}
|
||||
|
||||
static void
|
||||
parser_dealloc(self)
|
||||
parserobject *self;
|
||||
{
|
||||
PyMem_DEL(self);
|
||||
}
|
||||
|
||||
static int
|
||||
parser_print(self, fp, flags)
|
||||
parserobject * self;
|
||||
FILE * fp;
|
||||
int flags;
|
||||
{
|
||||
fprintf(fp, "<PyLRengine Object at %p>\n", self);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
parser_getattr(self, name)
|
||||
parserobject * self;
|
||||
char * name;
|
||||
{
|
||||
if (strcmp(name, "state") == 0)
|
||||
return Py_BuildValue("i", stackstate(self->stack));
|
||||
if (strcmp(name, "stacksize") == 0)
|
||||
return Py_BuildValue("i", (self->stack->si));
|
||||
if (strcmp(name, "prodinfo") == 0)
|
||||
return prodinfo2Py_prodinfo(self->prodinfo, self->prodinfosize);
|
||||
if (strcmp(name, "__members__") == 0)
|
||||
return Py_BuildValue("[sss]", "state", "stacksize", "prodinfo");
|
||||
else
|
||||
return Py_FindMethod(Parser_methods, (PyObject *) self, name);
|
||||
}
|
||||
|
||||
|
||||
static PyTypeObject ParserType = {
|
||||
PyObject_HEAD_INIT(&PyType_Type)
|
||||
0,
|
||||
"NewEngine", /* type name */
|
||||
sizeof(parserobject), /* basic size */
|
||||
0, /* itemsize */
|
||||
(destructor) parser_dealloc,
|
||||
(printfunc) parser_print,
|
||||
(getattrfunc) parser_getattr
|
||||
};
|
||||
|
||||
|
||||
/***********************************************************************
|
||||
* Module Logic
|
||||
***********************************************************************/
|
||||
|
||||
static PyObject *
|
||||
parsernew(self, args)
|
||||
PyObject* self;
|
||||
PyObject* args;
|
||||
{
|
||||
PyObject* pyprodlengths = NULL;
|
||||
PyObject* pyactions = NULL;
|
||||
PyObject* pygotos = NULL;
|
||||
PyObject* res = NULL;
|
||||
int bufchunksize=50;
|
||||
int stackchunksize=100;
|
||||
CHECK_MALLOC(pyprodlengths)
|
||||
CHECK_MALLOC(pyactions)
|
||||
CHECK_MALLOC(pygotos)
|
||||
if (!PyArg_ParseTuple(args, "O!O!O!|ii", &PyList_Type, &pyprodlengths,
|
||||
&PyList_Type, &pyactions, &PyList_Type, &pygotos,
|
||||
&bufchunksize, &stackchunksize))
|
||||
goto finally;
|
||||
res = (PyObject*) newparserobject(pyprodlengths, pyactions, pygotos, bufchunksize, stackchunksize);
|
||||
finally:
|
||||
Py_XDECREF(pyprodlengths);
|
||||
Py_XDECREF(pyactions);
|
||||
Py_XDECREF(pygotos);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
static struct PyMethodDef PyLRengine_methods[] = {
|
||||
{"NewEngine", (PyCFunction)parsernew},
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
|
||||
void
|
||||
initPyLRengine()
|
||||
{
|
||||
PyObject *m, *d;
|
||||
m = Py_InitModule("PyLRengine", PyLRengine_methods);
|
||||
d = PyModule_GetDict(m);
|
||||
if (PyErr_Occurred())
|
||||
Py_FatalError("can't initialize module PyLRengine");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
44
PyLR/README
Normal file
44
PyLR/README
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
You must have python 1.5b1 or newer to run PyLR, as it works with the
|
||||
builtin package support of that version.
|
||||
|
||||
To build:
|
||||
|
||||
1) decide whether you want the PyLRengine module to be a shared library.
|
||||
If not, comment out the '*shared*' line in Setup.
|
||||
|
||||
2)type make -f Makefile boot; make
|
||||
|
||||
that should build the package.
|
||||
|
||||
|
||||
To install:
|
||||
|
||||
If you want to install PyLR in your python distribution, just copy
|
||||
over the PyLR directory to your site-packages directory. If you want
|
||||
to save a little space, take a look at the __init__ file doc string in
|
||||
the top directory and it shows the necessary files (distribution minus
|
||||
Makefile, sedscript, etc). Also, there is a script (pgen.py) you may want
|
||||
in /usr/local/bin or something more accessible as executable from your shell.
|
||||
|
||||
There is html documentation in the doc/ directory.
|
||||
|
||||
|
||||
|
||||
To test:
|
||||
|
||||
pgen.py PyLR/tstpspec tst
|
||||
diff tst PyLR/Parsers/gram.py
|
||||
|
||||
the only difference should be the date line.
|
||||
|
||||
|
||||
Feedback:
|
||||
|
||||
send comments/suggestions/bugreports/contributions to
|
||||
scott@chronis.icgroup.com
|
||||
|
||||
|
||||
|
||||
thanks,
|
||||
|
||||
scott
|
||||
2
PyLR/Setup
Normal file
2
PyLR/Setup
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
*shared*
|
||||
PyLRengine PyLRenginemodule.c
|
||||
2
PyLR/Setup.in
Normal file
2
PyLR/Setup.in
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
*shared*
|
||||
PyLRengine PyLRenginemodule.c
|
||||
39
PyLR/__init__.py
Normal file
39
PyLR/__init__.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
"""
|
||||
This package has the following modules and characteristics:
|
||||
|
||||
(-) = not done yet
|
||||
(*) = done
|
||||
(?) = working on it
|
||||
|
||||
PyLR/ the top level module Language Genration Tools
|
||||
__init__.py(*) this file
|
||||
Lexer.py(*) defines the Lexer interface that the parser will use, uses re
|
||||
Lexers/(?) a package to put lexers for different things
|
||||
__init__ imports GrammarLex class
|
||||
GrammarLex.py The module that defines the lexer for grammar specifications
|
||||
Grammar.py(*) The module for dealing with grammars
|
||||
PyLRenginemodule.so(*) The engine behind a LR parser (can do SLR, LR, and LALR)
|
||||
Parser.py (*) A class interface to a parser
|
||||
Parsers/(?) A package for storing Parsers
|
||||
__init__ imports GrammarParser class
|
||||
gram.py(*) the definition of the GrammarParser (import into Parsers/ namespace)
|
||||
pgen.py(*) a script for parser generation
|
||||
parsertemplate.py the doc string of this module is the template for parser generation
|
||||
|
||||
|
||||
"""
|
||||
|
||||
|
||||
import Parser,Lexers,Parsers
|
||||
from Lexer import Lexer,SKIPTOK
|
||||
|
||||
|
||||
__version__ = "$Id$"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
75
PyLR/config.c
Normal file
75
PyLR/config.c
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
/* Generated automatically from /usr/lib/python1.5/config/config.c.in by makesetup. */
|
||||
/* -*- C -*- ***********************************************
|
||||
Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
|
||||
The Netherlands.
|
||||
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and its
|
||||
documentation for any purpose and without fee is hereby granted,
|
||||
provided that the above copyright notice appear in all copies and that
|
||||
both that copyright notice and this permission notice appear in
|
||||
supporting documentation, and that the names of Stichting Mathematisch
|
||||
Centrum or CWI or Corporation for National Research Initiatives or
|
||||
CNRI not be used in advertising or publicity pertaining to
|
||||
distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
While CWI is the initial source for this software, a modified version
|
||||
is made available by the Corporation for National Research Initiatives
|
||||
(CNRI) at the Internet address ftp://ftp.python.org.
|
||||
|
||||
STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
|
||||
REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
|
||||
CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
|
||||
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
******************************************************************/
|
||||
|
||||
/* Module configuration */
|
||||
|
||||
/* !!! !!! !!! This file is edited by the makesetup script !!! !!! !!! */
|
||||
|
||||
/* This file contains the table of built-in modules.
|
||||
See init_builtin() in import.c. */
|
||||
|
||||
#include "Python.h"
|
||||
|
||||
|
||||
extern void initregex();
|
||||
extern void initpcre();
|
||||
extern void initposix();
|
||||
extern void initsignal();
|
||||
|
||||
/* -- ADDMODULE MARKER 1 -- */
|
||||
|
||||
extern void PyMarshal_Init();
|
||||
extern void initimp();
|
||||
|
||||
struct _inittab _PyImport_Inittab[] = {
|
||||
|
||||
{"regex", initregex},
|
||||
{"pcre", initpcre},
|
||||
{"posix", initposix},
|
||||
{"signal", initsignal},
|
||||
|
||||
/* -- ADDMODULE MARKER 2 -- */
|
||||
|
||||
/* This module "lives in" with marshal.c */
|
||||
{"marshal", PyMarshal_Init},
|
||||
|
||||
/* This lives it with import.c */
|
||||
{"imp", initimp},
|
||||
|
||||
/* These entries are here for sys.builtin_module_names */
|
||||
{"__main__", NULL},
|
||||
{"__builtin__", NULL},
|
||||
{"sys", NULL},
|
||||
|
||||
/* Sentinel */
|
||||
{0, 0}
|
||||
};
|
||||
142
PyLR/doc/PyLR.html
Normal file
142
PyLR/doc/PyLR.html
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
<html>
|
||||
<body bgcolor="#ffffff">
|
||||
<title> PyLR -- Fast LR parsing in python </title>
|
||||
<!-- Changed by: Scott, 15-Dec-1997 -->
|
||||
<center>
|
||||
<h2>PyLR -- Fast LR parsing in python</h2>
|
||||
<hr>
|
||||
</center>
|
||||
|
||||
<ul>
|
||||
<li> <a href="#whatis"> What is PyLR? </a>
|
||||
<li> <a href="#status"> What is the current state of PyLR? </a>
|
||||
<li> <a href="#where"> Where do I get PyLR? </a>
|
||||
<li> <a href="#directions"> What will be added to PyLR? </a>
|
||||
<li> <a href="#parsing"> Where do I find out about parsing theory? </a>
|
||||
<li> <a href="#contrib"> How can I contribute to PyLR? </a>
|
||||
</ul>
|
||||
<hr>
|
||||
<p><p>
|
||||
<a name="whatis"><h2>What is PyLR?</h2></a>
|
||||
|
||||
PyLR is a package of tools for creating efficient parsers in python,
|
||||
commonly known as a compiler compiler. PyLR is currently under
|
||||
development. A ful release is almost complete, but there are still a few missing
|
||||
features that would make it much nicer.
|
||||
|
||||
<p>
|
||||
PyLR (pronounced 'pillar') was motivated by the frequencly with which parsers are hand
|
||||
coded in python, the performance demands that these parsers are subject to (you just can't beat
|
||||
native machine code for speed...), and academic curiosity (I wanted to really know how LR
|
||||
parsing works).
|
||||
<p><p>
|
||||
|
||||
|
||||
<a name="status"> <h2>What is the current state of PyLR? </h2></a>
|
||||
PyLR currently has class interfaces to a Grammar, a Lexer, an extension module
|
||||
defining a parsing engine builtin type, and a parser generator script. All of these components
|
||||
are based on sound parsing theory, but nevertheless haven't been tested by anyone but it's author.
|
||||
The code as is stands can definitely be of use to anyone hand writing a parser in python, but some
|
||||
of the nicer things in the complete package <em> just haven't been done yet </em>. <p>
|
||||
PyLR is therefore under development, as it will always be. PyLR will be given a release number
|
||||
once it supplies the following tools:
|
||||
<ul>
|
||||
|
||||
|
||||
<LI> write an 'engine' module that implements the LR parsing
|
||||
algorythm in C with callbacks to python functions. (done) </LI>
|
||||
|
||||
|
||||
<LI> write a Lexer class using re (done)</LI>
|
||||
|
||||
|
||||
<LI> write a Grammar class that will take as input a context
|
||||
free grammar and produce the parsing tables necessary to complement
|
||||
the engine. This is to be done with LR(1) grammars (done and then
|
||||
deleted -- extremely inefficient) and LALR(1) Grammars(done,
|
||||
except with epsilon (empty) productions,<EM> much</EM> more efficient). </LI>
|
||||
|
||||
|
||||
<LI> add a user interface -- manually write a lexer and Grammar
|
||||
using the exisiting classes to parse lexer and grammar specifications
|
||||
modelled after lex/flex and yacc/bison. (done for Grammars)
|
||||
</LI>
|
||||
|
||||
<LI> write documentation. (usable, but not done)
|
||||
</LI>
|
||||
|
||||
<LI> (post release) add grammars to various languages to the
|
||||
distribution.
|
||||
</LI>
|
||||
</ul>
|
||||
In addtion, I have the following plan for the project:
|
||||
<UL>
|
||||
<LI> make 'epsilon' (empty) productions work (many of them work now, but not all) </LI>
|
||||
|
||||
<LI> optimize the Lexer. Try to join it into one regular expression and derive
|
||||
function calls from match object data. (done, still the slowest part of parsing)</LI>
|
||||
|
||||
<LI> add error specification routines. </LI>
|
||||
|
||||
<LI> change the parser generation algorithm to use only kernel LALR(1) items
|
||||
in the computation of shift actions and gotos in the goto table. This
|
||||
should significantly enhance the rate of parser generation, which is currently
|
||||
a bit slow, but certainly acceptable for medium-sized grammars (< ~100 productions)
|
||||
(done!) this version
|
||||
</LI>
|
||||
|
||||
|
||||
<LI> write a Parser for sql, as used in <A HREF="http://www.pythonpros.com/arw/kwParsing/">gadfly</A>
|
||||
</LI>
|
||||
|
||||
<LI> add operator precedence as an option to the parser specification (further down the road...)</LI>
|
||||
|
||||
</UL>
|
||||
These things will probably be done over the next month or two (as I only have free time to give
|
||||
to this project...Ahemmm...).
|
||||
<p><p>
|
||||
<a name="where"><h2>Where do I get PyLR? </h2></a>
|
||||
You can get PyLR in one of two places, <a href="ftp://chronis.icgroup.com/pub/">here</a>
|
||||
or <a href="PyLR.tgz"> here</a>. Both versions will be in sync with each other.
|
||||
<p><p>
|
||||
|
||||
<a name="directions"><h2>What will be added to PyLR? </h2></a>
|
||||
In addition to the <a href ="#status">list of things to finish </a> before a full release,
|
||||
is published, PyLR could be used as the basis for an efficient datapath analyzer (optimizer),
|
||||
for a front end to translation from one language to another, for type checking code, etc.<p>
|
||||
As soon as the first release is completed, Tools to aid in all these things could well be added
|
||||
to the package. Also, anyone wanting to contribute parser specifications for
|
||||
languages of general use is most welcome.
|
||||
<p><p>
|
||||
|
||||
<a name="parsing"> <h2>Where do I find out more about parsing? </h2></a>
|
||||
Parsing was for a long time a big challenge for computer scientists. The need for
|
||||
computer parsing originally came about with the first writing of compilers. Since then, the
|
||||
theory behind parsing has been studied in depth and has pretty much stabilized as it no longer
|
||||
really presents a big problem in terms of speed or size in terms of parsing todays computer
|
||||
languages. One standard means of parsing that has been used for years because of its efficiency
|
||||
is LR parsing (more particularly, LALR parsing). A lot of good information is in
|
||||
<a href="http://www.amazon.com/exec/obidos/ISBN=1565920007">
|
||||
Lex and Yacc</a> ,
|
||||
<a href="http://www.amazon.com/exec/obidos/ISBN=0201100886">
|
||||
The Dragon Book </a>, and
|
||||
it seems like the only place to find good info on LALR parsing is in
|
||||
|
||||
<pre>
|
||||
DeRemer, F.; and Pennello, T.Efficient computation of LALR(1) look-ahead sets, ACM Trans.
|
||||
Program. Lang. Syst. 4 (1982), 615-649.
|
||||
</pre>
|
||||
|
||||
Finally, to find out how to use PyLR, see the<A HREF="manual.html">PyLR manual</A>
|
||||
|
||||
<a name="contrib"> <h2>How do I contribute to PyLR? </h2></a>
|
||||
<a href="mailto:scott@chronis.icgroup.com">mail me. </a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
313
PyLR/doc/manual.html
Normal file
313
PyLR/doc/manual.html
Normal file
|
|
@ -0,0 +1,313 @@
|
|||
<html>
|
||||
<title> PyLR maual </title>
|
||||
<!-- Changed by: Scott, 12-Dec-1997 -->
|
||||
<body bgcolor="#ffffff">
|
||||
|
||||
<CENTER>
|
||||
<h2>PyLR Manual</h2>
|
||||
</CENTER>
|
||||
|
||||
This is the PyLR parser generator manual. PyLR is a parser generator package for
|
||||
use with python (version 1.5b1 or better). This manual addresses how to use the
|
||||
package to produce parsers.
|
||||
<p>
|
||||
<UL>
|
||||
<LI> <A HREF="#Audience">Intended Audience</A> </LI>
|
||||
|
||||
<LI> <A HREF="#Basics">The Basics</A> </LI>
|
||||
<UL>
|
||||
<LI> <A HREF="#Lexer">Writing a Lexer</A> </LI>
|
||||
|
||||
<LI> <A HREF="#Grammar">Writing a Grammar</A> </LI>
|
||||
|
||||
<LI> <A HREF="#Parser">Putting it together, producing the parser</A> </LI>
|
||||
</UL>
|
||||
|
||||
<LI> <A HREF="#Struct">PyLR Structure Overview</A> </LI>
|
||||
|
||||
<LI> <A HREF="#API">Programming with the Classes</A> </LI>
|
||||
</UL>
|
||||
|
||||
<HR>
|
||||
<p>
|
||||
<p>
|
||||
<A NAME="Audience"> <center> <h3> Audience </h3></center></A>
|
||||
Parsing can be very complicated stuff, and it helps to understand what exactly is
|
||||
happening when something is parsed when writing a parser. Unfortunately (for the impatient),
|
||||
the topic of Parsing has been the subject of many a dissertation. This document will present
|
||||
two views on the data it presents. One is a technical view which will contain terms <EM>without
|
||||
defining them</EM>. These terms are generally understood by those who have studied parsing theory
|
||||
(such as LALR, shift-reduce, etc), and probably not understood by those that haven't. For this
|
||||
reason, I have attempted to include an intuitive view whenever possible, particularly in the
|
||||
section <A HREF="#Basics">The Basics</A>. There should be enough in that section to let anyone
|
||||
interested who is interested and familiar with python write a parser.
|
||||
<HR><p>
|
||||
<A NAME="Basics"><center><h3>The Basics</h3></center></A> <br>
|
||||
|
||||
This section refers to writing lexers, Grammars, and then producing a parser with
|
||||
these parts. In PyLR, a lexer is part of a parser. This simplifies the interface to
|
||||
actually doing the parsing. There is an 'engine' which takes the output of the lexer and triggers
|
||||
the back end of parsing. So we'll start with writing a lexer.
|
||||
<UL>
|
||||
<LI>
|
||||
<A NAME="Lexer"><h4>Writing a Lexer</h4></A><br>
|
||||
When some text is to be parsed, it first has to go through lexical analysis. This
|
||||
process is done with a lexer. PyLR provides a base Lexer class to help write a lexer.
|
||||
The process isn't hard. A lexer just returns the atomic parts of the text. You define what is
|
||||
atomic by using regular expressions to match the atomic parts. Each atomic definition
|
||||
you give is automatically given a token value (an int). When the lexer scans text, it returns
|
||||
a stream of <TT>(token, value)</TT> pairs where the the token is the token value that was assigned
|
||||
to the match definition and the the value is an arbitrary python value (class, string, int, whatever).
|
||||
The <TT>(token, value)</TT> pair is then passed to the parser for further processing.
|
||||
<p>
|
||||
|
||||
|
||||
Frequently, lexers will return the matched text as the
|
||||
<TT>value</TT> in the <TT>(token, value)</TT> pair. This is the
|
||||
default when you subclass the provided Lexer class. However, there
|
||||
are a lot of different things you may want to happen upon finding a
|
||||
match. For example, sometimes you will want to match something but
|
||||
not use the match or pass it on to the parser.
|
||||
<p>
|
||||
|
||||
There is a function in the base class that
|
||||
provides for all these and more options. It is the <br>
|
||||
<TT>.addmatch(compiled_regex, tokenname="", function=None,
|
||||
flags=MAPTOK|EXECFN)</TT> <br> method. This method requires only a regular
|
||||
expression as its argument, but in practice, token names should be passed along with
|
||||
the re. This practice will make your grammar more readable and easier
|
||||
to write later. <br> The <TT>function</TT> argument, if specified, will make the
|
||||
lexer execute that function with the resulting match object as it's
|
||||
one and only argument. The lexer will then return the return value of
|
||||
the function as the <TT>value</TT> in the <TT>(token, value)</TT> pair
|
||||
it returns. By default, the lexer will just return the token and the associated
|
||||
matched text.
|
||||
<br>
|
||||
The <TT>flags</TT> argument not only defaults to the reasonable MAPTOK|EXECFN, but also adopts to
|
||||
the values of the other arguments you pass. This way, you dont' have to bother with them much. The one
|
||||
time it's common to use the flags is when you want the lexer to match something but not return anything until
|
||||
the next match. It is common to have whitespace treated in this fashion. For this option, you use
|
||||
<TT>.addmatch(re.compile(r"\s+"), "", None, Lexer.SKIPTOK)</TT>. The example below utilizes all these
|
||||
options.
|
||||
<p>
|
||||
Finally, please note the call of the <TT>.seteof()</TT> function at the end of the <TT>__init__</TT>
|
||||
method. This is necessary for all subclassed lexers. The reason it is there is that the token value
|
||||
of EOF is expected to be one greater than any other token value by the parser. <EM>Your lexer will not
|
||||
work with the parser api without this call.</EM>
|
||||
<p>
|
||||
Example
|
||||
<pre>
|
||||
from PyLR import Lexer
|
||||
import re, string
|
||||
|
||||
#
|
||||
# this function will handle matches to an integer. It passes the
|
||||
# integer value to the parser and does the conversion here.
|
||||
#
|
||||
def intfunc(m):
|
||||
return string.atoi(m.group(0))
|
||||
|
||||
|
||||
class mathlex(Lexer.Lexer):
|
||||
|
||||
#
|
||||
# define the atomic parts with regular expressions
|
||||
#
|
||||
|
||||
INT = re.compile(r"([1-9]([0-9]+)?)|0") # matches an integer
|
||||
LPAREN = re.compile(r"\(") # matches '('
|
||||
RPAREN = re.compile(r"\)") # matches ')'
|
||||
|
||||
TIMES = re.compile(r"\*") # matches '*'
|
||||
PLUS = re.compile(r"\+") # matches '+'
|
||||
WS = re.compile(r"\s+") # matches whitespace
|
||||
|
||||
|
||||
def __init__(self):
|
||||
#
|
||||
# initialize with the base class
|
||||
#
|
||||
Lexer.Lexer.__init__(self)
|
||||
#
|
||||
# addmatch examples
|
||||
#
|
||||
self.addmatch(self.INT, idfunc, "INT")
|
||||
for p,t in ( (self.PLUS, "PLUS"), (self.TIMES,"TIMES"),
|
||||
(self.LPAREN, "LPAREN"), (self.RPAREN, "RPAREN"),):
|
||||
self.addmatch(p, None, t)
|
||||
self.addmatch(self.ws, None, "", Lexer.SKIPTOK)
|
||||
self.seteof()
|
||||
|
||||
|
||||
# create the lexer
|
||||
lexer = mathlex()
|
||||
# test it with the interactivetest method
|
||||
lexer.interactivetest()
|
||||
</pre>
|
||||
|
||||
</LI>
|
||||
<hr>
|
||||
<LI> <A NAME="Grammar"><h4>Writing a Grammar</h4></A><br>
|
||||
The grammar you write is somewhat easier than the lexer. You don't have
|
||||
to code anything. There is a program in the PyLR distribution called <TT>pgen.py</TT>
|
||||
that will take your Grammar specification and produce a parser for you. The parser that is
|
||||
produced is of the shift-reduce variety of LR parsers and uses LALR(1) items to help produce
|
||||
the parsing tables. In other words, it uses a parsing algorithm that is quite efficient (implemented
|
||||
in C) and will handle most modern day programming language constructs without a problem. These
|
||||
qualities have made this parsing algorithm a very commonly used one in compiler construction since
|
||||
October 1982.
|
||||
<p>
|
||||
When you write a grammar, you are specifying a <EM>context free grammar</EM> in normal form,
|
||||
with a few addons to help generate the parser in Python. In other words, you specify a series
|
||||
of productions. For example, to specify a very simple math grammar that will work with the
|
||||
above lexer, you may state something like this:
|
||||
|
||||
<pre>
|
||||
expression: expression PLUS term
|
||||
| term;
|
||||
|
||||
term: term TIMES factor
|
||||
| factor;
|
||||
|
||||
factor: LPAREN expression RPAREN
|
||||
| INT;
|
||||
</pre>
|
||||
|
||||
The identifiers in all uppercase are conventionally <EM>terminal symbols</EM>.
|
||||
These will be identified by the lexer and returned to the parser. The identifiers
|
||||
in all lowercase are the <EM>nonterminal symbols</EM>. Each nonterminal must appear
|
||||
on the left somewhere. The corresponding right side may have terminals or non terminals.
|
||||
You may not have empty (epsilon) right hand sides (yet).
|
||||
<p>
|
||||
Whenever the parser recognizes a production, it will call a function. You may specify
|
||||
the name of the method of the parser class to be invoked for a production by adding
|
||||
a parenthesized name to the right of the production. The above grammar rewritten with
|
||||
method name specifications looks like this (This part will become more clear after the next step,
|
||||
stay with it!).
|
||||
|
||||
<pre>
|
||||
expression: expression PLUS term (addfunc)
|
||||
| term;
|
||||
|
||||
term: term TIMES factor (timesfunc)
|
||||
| factor;
|
||||
|
||||
factor: LPAREN expression RPAREN (parenfunc)
|
||||
| INT;
|
||||
</pre>
|
||||
|
||||
</LI>
|
||||
|
||||
<LI> <A NAME="Parser"><h4>Putting it all together: making the parser</h4></A><br>
|
||||
When you create a parser, you are creating a class that is intended to act like
|
||||
a class in library code. That is, it will mostly be used by subclassing that class.
|
||||
The parser you create will parse what it was intended to, but it won't do anything
|
||||
with the parse tree unless you subclass it and define some special methods.
|
||||
<p>
|
||||
Those methods must have the name specified in the grammar you wrote. For example, if you
|
||||
built a parser for the above grammar, in order for it to actually add things together,
|
||||
you would have to subclass the class that was produced and then define the methods
|
||||
<TT>addfunc</TT>, <TT>timesfunc</TT>, and <TT>parenfunc</TT>. When each of these methods is called
|
||||
it will be passed the values on the right hand side of the corresponding production as arguments.
|
||||
Those values are either the value returned by the lexer, if the symbol is terminal, or
|
||||
a value returned by one of these special methods, if the symbol is a nonterminal.
|
||||
<p>
|
||||
In the above example, since the rest of the productions only have one item, it doesn't really matter
|
||||
whether or not they have methods, the parser just calls a reasonable default.
|
||||
<p>
|
||||
As you can see, we've defined most of what is necessary for building a parser. But the above should tell
|
||||
you that there are a few other things that you may want to define, like the name of the class that
|
||||
is produced, or what lexer is used with the parser. Describing these things along with a grammar like
|
||||
the example above is writing a parser specification for PyLR. A reasonable parser specification for the
|
||||
example we've been following:
|
||||
<pre>
|
||||
_class SimpleMathParser
|
||||
_lex mathlex.mathlex()
|
||||
_code from PyLR.Lexers import mathlex
|
||||
"""
|
||||
expression: expression PLUS term (addfunc)
|
||||
| term;
|
||||
|
||||
term: term TIMES factor (timesfunc)
|
||||
| factor;
|
||||
|
||||
factor: LPAREN expression RPAREN (parenfunc)
|
||||
| INT;
|
||||
"""
|
||||
</pre>
|
||||
the <TT>_class </TT> keyword defines the name of the class that the parser will take
|
||||
the <TT>_lex</TT> keyword defines the code used to intialize that parser's lexer
|
||||
the <TT>_code</TT> keyword defines extra code at the top of the output file. Multiple
|
||||
instances of this keyword will cause the extra source code (in python) to be accumulated.
|
||||
the triple quotes delimit the grammar section.
|
||||
<p><em>
|
||||
Please note, the above syntax is subject to change as this is an alpha release and I feel
|
||||
that it can be improved upon.</em>
|
||||
<p>
|
||||
now you can create a parser. Just use the <TT>pgen.py</TT> script and it will output
|
||||
your source code:
|
||||
<pre>
|
||||
pgen.py mathparserspec tst.py
|
||||
chronis 3:34am $ python
|
||||
Python 1.5b1 (#1, Nov 27 1997, 19:51:47) [GCC 2.7.2] on linux2
|
||||
Copyright 1991-1995 Stichting Mathematisch Centrum, Amsterdam
|
||||
>>> import tst
|
||||
>>> dir(tst)
|
||||
['PyLR', 'SimpleMathParser', '__builtins__', '__doc__', '__file__', '__name__', '_actiontable', '_gototable', '_prodinfo', 'mathlex']
|
||||
>>> print tst.SimpleMathParser.__doc__
|
||||
|
||||
this class was produced automatically by the PyLR parser generator.
|
||||
It is meant to be subclassed to produce a parser for the grammar
|
||||
|
||||
expression -> expression PLUS term (addfunc)
|
||||
| term; (unspecified)
|
||||
term -> term TIMES factor (timesfunc)
|
||||
| factor; (unspecified)
|
||||
factor -> LPAREN expression RPAREN (parenfunc)
|
||||
| INT; (unspecified)
|
||||
|
||||
While parsing input, if one of the above productions is recognized,
|
||||
a method of your sub-class (whose name is indicated in parens to the
|
||||
right) will be invoked. Names marked 'unspecified' will not me invoked.
|
||||
|
||||
usage:
|
||||
|
||||
class MySimpleMathParser(SimpleMathParser):
|
||||
# ...define the methods for the productions...
|
||||
|
||||
p = MySimpleMathParser(); p.parse(text)
|
||||
|
||||
>>> class MP(tst.SimpleMathParser):
|
||||
... def __init__(self):
|
||||
... tst.SimpleMathParser.__init__(self)
|
||||
... def addfunc(self, left, plus, right):
|
||||
... print "%d + %d" % (left, right)
|
||||
... return left + right
|
||||
... def parenfunc(self, lp, expr, rp):
|
||||
... print "handling parens"
|
||||
... return expr
|
||||
... def timesfunc(self, left, times, right):
|
||||
... print "%d * %d" % (left, right)
|
||||
... return left * right
|
||||
...
|
||||
>>> mp = mathparser()
|
||||
>>> mp.parse("4 * (3 + 2 * 5)")
|
||||
2 * 5
|
||||
3 + 10
|
||||
handling parens
|
||||
4 * 13
|
||||
|
||||
|
||||
</pre>
|
||||
|
||||
</LI>
|
||||
</UL>
|
||||
|
||||
|
||||
<A NAME="Struct"><center><h3>Structure</h3></center></A>
|
||||
Nothing yet, sorry it's an alpha, read the source.
|
||||
|
||||
<A NAME="API"><center><h3>API</h3></center></A>
|
||||
Nothing yet, sorry it's an alpha. Read the source.
|
||||
|
||||
</html>
|
||||
170
PyLR/gramnew.py
Normal file
170
PyLR/gramnew.py
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
"""
|
||||
out -- created Sun Dec 14 21:41:11 1997
|
||||
|
||||
This file was automatically generated by the PyLR parser generator.
|
||||
It defines the tables 'actiontable', 'gototable', and 'prodinfo'. These
|
||||
tables are used to give functionality to a parsing engine. It also defines
|
||||
A Parser class called GrammarParser which will use this engine. It's Usage is
|
||||
indicated in GrammarParser's doc-string.
|
||||
"""
|
||||
#
|
||||
# this section contains source code added by the user
|
||||
# plus 'import PyLR'
|
||||
#
|
||||
|
||||
import PyLR.Lexers
|
||||
import PyLR.Parser
|
||||
import PyLR
|
||||
|
||||
#
|
||||
# the action table ('s', 4) means shift to state 4,
|
||||
# ('r', 4) means reduce by production number 4
|
||||
# other entries are errors. each row represents a state
|
||||
# and each column a terminal lookahead symbol (plus EOF)
|
||||
# these symbols are ['LEX', 'CODE', 'CLASS', 'ID', 'COLON', 'SCOLON', 'OR', 'LPAREN', 'RPAREN', 'GDEL', 'EOF']
|
||||
#
|
||||
_actiontable = [
|
||||
[('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('a', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 1)],
|
||||
[('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 2)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 7), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 3)],
|
||||
[('r', 4), ('r', 4), ('r', 4), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 4), ('', -1)],
|
||||
[('r', 5), ('r', 5), ('r', 5), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 5), ('', -1)],
|
||||
[('r', 6), ('r', 6), ('r', 6), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 6), ('', -1)],
|
||||
[('r', 7), ('r', 7), ('r', 7), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 7), ('', -1)],
|
||||
[('r', 8), ('r', 8), ('r', 8), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 8), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('r', 9), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 9), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('r', 10), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 10), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('s', 16), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 18), ('s', 20), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('r', 11), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 11), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 12), ('r', 12), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 13), ('r', 13), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 14), ('r', 14), ('s', 23), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 24), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 25), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 15), ('r', 15), ('', -1), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('s', 27), ('', -1), ('r', 16), ('r', 16), ('r', 16), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('r', 18), ('', -1), ('r', 18), ('r', 18), ('r', 18), ('', -1), ('', -1), ('', -1)],
|
||||
[('', -1), ('', -1), ('', -1), ('r', 19), ('', -1), ('r', 19), ('r', 19), ('r', 19), ('', -1), ('', -1), ('', -1)]
|
||||
]
|
||||
|
||||
|
||||
|
||||
#
|
||||
# the goto table, each row represents a state
|
||||
# and each column, the nonterminal that was on the lhs of the
|
||||
# reduction
|
||||
#
|
||||
_gototable = [
|
||||
[1, 2, 3, 9, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, 4, None, 8, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, 6, 14, None, None, None, None],
|
||||
[None, None, None, None, None, 13, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, 17, 19, 22, 26],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, 21, 22, 26],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None],
|
||||
[None, None, None, None, None, None, None, None, None, None]
|
||||
]
|
||||
|
||||
|
||||
|
||||
#
|
||||
# This is the prodinfo table. each row represents a production
|
||||
# the entries are the length of the production, the name of a method
|
||||
# in an instance of the GrammarParser class below that gets called
|
||||
# when that production occurs, and the index of the lhs in the
|
||||
# nonterminals (as in # the gototable)
|
||||
#
|
||||
_prodinfo = [
|
||||
(1, 'unspecified', 0), # pspec -> ['gspec']
|
||||
(2, 'unspecified', 0), # pspec -> ['pydefs', 'gspec']
|
||||
(3, 'unspecified', 1), # gspec -> ['GDEL', 'lhsdeflist', 'GDEL']
|
||||
(2, 'unspecified', 2), # pydefs -> ['pydefs', 'pydef']
|
||||
(1, 'unspecified', 2), # pydefs -> ['pydef']
|
||||
(1, 'lexdef', 3), # pydef -> ['LEX']
|
||||
(1, 'addcode', 3), # pydef -> ['CODE']
|
||||
(1, 'classname', 3), # pydef -> ['CLASS']
|
||||
(2, 'unspecified', 4), # lhsdeflist -> ['lhsdeflist', 'lhsdef']
|
||||
(1, 'unspecified', 4), # lhsdeflist -> ['lhsdef']
|
||||
(4, 'lhsdef', 5), # lhsdef -> ['ID', 'COLON', 'rhslist', 'SCOLON']
|
||||
(1, 'singletolist', 6), # rhslist -> ['rhs']
|
||||
(3, 'rhslist_OR_rhs', 6), # rhslist -> ['rhslist', 'OR', 'rhs']
|
||||
(1, 'rhs_idlist', 7), # rhs -> ['rhsidlist']
|
||||
(4, 'rhs_idlist_func', 7), # rhs -> ['rhsidlist', 'LPAREN', 'ID', 'RPAREN']
|
||||
(1, 'unspecified', 8), # rhsidlist -> ['idlist']
|
||||
(0, 'rhseps', 8), # rhsidlist -> []
|
||||
(2, 'idl_idlistID', 9), # idlist -> ['idlist', 'ID']
|
||||
(1, 'idlistID', 9), # idlist -> ['ID']
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
||||
class GrammarParser (PyLR.Parser.Parser):
|
||||
"""
|
||||
this class was produced automatically by the PyLR parser generator.
|
||||
It is meant to be subclassed to produce a parser for the grammar
|
||||
|
||||
pspec -> gspec (unspecified)
|
||||
| pydefs gspec; (unspecified)
|
||||
gspec -> GDEL lhsdeflist GDEL; (unspecified)
|
||||
pydefs -> pydefs pydef (unspecified)
|
||||
| pydef; (unspecified)
|
||||
pydef -> LEX (lexdef)
|
||||
| CODE (addcode)
|
||||
| CLASS; (classname)
|
||||
lhsdeflist -> lhsdeflist lhsdef (unspecified)
|
||||
| lhsdef; (unspecified)
|
||||
lhsdef -> ID COLON rhslist SCOLON; (lhsdef)
|
||||
rhslist -> rhs (singletolist)
|
||||
| rhslist OR rhs; (rhslist_OR_rhs)
|
||||
rhs -> rhsidlist (rhs_idlist)
|
||||
| rhsidlist LPAREN ID RPAREN; (rhs_idlist_func)
|
||||
rhsidlist -> idlist (unspecified)
|
||||
| ; (unspecified)
|
||||
idlist -> idlist ID (idl_idlistID)
|
||||
| ID; (idlistID)
|
||||
|
||||
While parsing input, if one of the above productions is recognized,
|
||||
a method of your sub-class (whose name is indicated in parens to the
|
||||
right) will be invoked. Names marked 'unspecified' should be ignored.
|
||||
|
||||
usage:
|
||||
|
||||
class MyGrammarParser(GrammarParser):
|
||||
# ...define the methods for the productions...
|
||||
|
||||
p = MyGrammarParser(); p.parse(text)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
lexer = PyLR.Lexers.GrammarLex()
|
||||
PyLR.Parser.Parser.__init__(self, lexer, _actiontable, _gototable, _prodinfo)
|
||||
75
PyLR/parsertemplate.py
Normal file
75
PyLR/parsertemplate.py
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
#
|
||||
# this file's doc string is used as a template for producing PyLRtables.py.
|
||||
# PyLRtables.py containes the source code to produce the engine part of a
|
||||
# parser.
|
||||
#
|
||||
'''\
|
||||
"""
|
||||
%(filename)s -- created %(date)s
|
||||
|
||||
This file was automatically generated by the PyLR parser generator.
|
||||
It defines the tables 'actiontable', 'gototable', and 'prodinfo'. These
|
||||
tables are used to give functionality to a parsing engine. It also defines
|
||||
A Parser class called %(parsername)s which will use this engine. It's usage
|
||||
is indicated in %(parsername)s's doc-string.
|
||||
"""
|
||||
#
|
||||
# this section contains source code added by the user
|
||||
# plus 'import PyLR'
|
||||
#
|
||||
%(extrasource)s
|
||||
import PyLR
|
||||
|
||||
#
|
||||
# the action table
|
||||
# 's' means shift
|
||||
# ('r',<n>) means reduce with production n
|
||||
# 'a' means accept
|
||||
# '' means error
|
||||
# each row represents a state and each column a terminal lookahead symbol
|
||||
# (excluding symbols with Lexer.SKIPTOK of course).
|
||||
# Lexer symbols are:
|
||||
# %(symbols)s
|
||||
#
|
||||
_actiontable = %(actiontable)s
|
||||
|
||||
#
|
||||
# the goto table, each row represents a state
|
||||
# and each column, the nonterminal that was on the lhs of the
|
||||
# reduction
|
||||
#
|
||||
_gototable = %(gototable)s
|
||||
|
||||
#
|
||||
# This is the prodinfo table. each row represents a production
|
||||
# the entries are the length of the production, the name of a method
|
||||
# in an instance of the %(parsername)s class below that gets called
|
||||
# when that production occurs, and the index of the lhs in the
|
||||
# nonterminals (as in # the gototable)
|
||||
#
|
||||
_prodinfo = %(prodinfo)s
|
||||
|
||||
|
||||
class %(parsername)s(PyLR.Parser.Parser):
|
||||
"""
|
||||
this class was produced automatically by the PyLR parser generator.
|
||||
It is meant to be subclassed to produce a parser for the grammar
|
||||
|
||||
%(grammar)s
|
||||
|
||||
While parsing input, if one of the above productions is recognized,
|
||||
a method of your sub-class (whose name is indicated in parens to the
|
||||
right) will be invoked. Names marked 'unspecified' should be ignored.
|
||||
|
||||
usage:
|
||||
|
||||
class My%(parsername)s(%(parsername)s):
|
||||
# ...define the methods for the productions...
|
||||
|
||||
p = My%(parsername)s(); p.parse(text)
|
||||
"""
|
||||
def __init__(self):
|
||||
lexer = %(lexerinit)s
|
||||
PyLR.Parser.Parser.__init__(self, lexer, _actiontable, _gototable, _prodinfo)
|
||||
'''
|
||||
|
||||
118
PyLR/pgen.py
Normal file
118
PyLR/pgen.py
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import PyLR, PyLR.Grammar, sys, getopt
|
||||
from PyLR.Parsers import GrammarParser
|
||||
|
||||
class ParserParser(GrammarParser):
|
||||
def __init__(self):
|
||||
GrammarParser.__init__(self)
|
||||
self.result = [] # to be populated with productions
|
||||
self.funcmap = {}
|
||||
self.usercode = ""
|
||||
self.lexdef = ""
|
||||
self.classname = "MyParser"
|
||||
self.idlist = []
|
||||
|
||||
def idlistID(self, id):
|
||||
"idlist -> id"
|
||||
self.idlist.append(id)
|
||||
return [id]
|
||||
|
||||
def singletolist(self, el):
|
||||
"rhslist -> rhs"
|
||||
return [el]
|
||||
|
||||
def idl_idlistID(self, l, el):
|
||||
"idlist -> idlist id"
|
||||
self.idlist.append(id)
|
||||
l.append(el)
|
||||
return l
|
||||
|
||||
def rhs_idlist(self, l):
|
||||
"rhs -> idlist"
|
||||
return l
|
||||
|
||||
def rhseps(self):
|
||||
"rhseps -> "
|
||||
return []
|
||||
|
||||
def rhs_idlist_func(self, l, lp, id, rp):
|
||||
"rhs -> idlist LPAREN ID RPAREN"
|
||||
self.funcmap[tuple(l)] = id
|
||||
return l
|
||||
|
||||
def rhslist_OR_rhs(self, l, OR, el):
|
||||
"rhs -> rhslist OR rhs"
|
||||
l.append(el)
|
||||
return l
|
||||
|
||||
def lhsdef(self, lhs, COLON, rhslist, SCOLON):
|
||||
"lhsdef -> ID COLON rhslist SCOLON"
|
||||
print lhs
|
||||
for rhs in rhslist:
|
||||
self.result.append(PyLR.Grammar.Production(lhs, rhs))
|
||||
return None
|
||||
|
||||
def lexdef(self, ld):
|
||||
self.lexdef = ld
|
||||
|
||||
def addcode(self, code):
|
||||
self.usercode = self.usercode + "\n" + code
|
||||
|
||||
def classname(self, name):
|
||||
self.classname = name
|
||||
|
||||
def parse(self, text, outf, verbose=0):
|
||||
global g, toks, lexer
|
||||
PyLR.Parser.Parser.parse(self, text, verbose)
|
||||
# insert the functionnames
|
||||
for p in self.result:
|
||||
funcname = self.funcmap.get(tuple(p.RHS), "unspecified")
|
||||
p.setfuncname(funcname)
|
||||
#evaluate the lexer
|
||||
exec(self.usercode)
|
||||
lexer = eval(self.lexdef)
|
||||
|
||||
# generate the tokens for grammar
|
||||
toks = lexer.getTokenList()
|
||||
# change the symbols to their numbers
|
||||
for p in self.result:
|
||||
for si in range(len(p.RHS)):
|
||||
if p.RHS[si] in toks:
|
||||
p.RHS[si] = toks.indexof(p.RHS[si])
|
||||
|
||||
g = PyLR.Grammar.LALRGrammar(self.result, toks)
|
||||
print g
|
||||
g.extrasource = self.usercode
|
||||
print "done parsing, about to start parser generation (writing to %s)" % outf
|
||||
if self.lexdef:
|
||||
g.writefile(outf, self.classname, self.lexdef)
|
||||
else:
|
||||
g.writefile(outf, self.classname)
|
||||
print "done"
|
||||
|
||||
|
||||
def main():
|
||||
usage = "pgen.py infile outfile"
|
||||
args = sys.argv[1:]
|
||||
if len(args) != 2:
|
||||
print usage
|
||||
sys.exit(0)
|
||||
inf = args[0]
|
||||
outf = args[1]
|
||||
if inf == "-":
|
||||
f = sys.stdin
|
||||
else:
|
||||
f = open(inf)
|
||||
pspec = f.read()
|
||||
# f.close() # dont close stdin
|
||||
global pp # for use with python -i pgen.py <inf> <outf>
|
||||
pp = ParserParser()
|
||||
verbose=1
|
||||
pp.parse(pspec, outf, verbose)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
28
PyLR/sedscript
Normal file
28
PyLR/sedscript
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
1i\
|
||||
# Generated automatically from Makefile.pre.in by sedscript.
|
||||
s%@VERSION[@]%1.5%
|
||||
s%#@SET_CCC[@]%CCC=g++%
|
||||
s%@CC[@]%gcc%
|
||||
s%@RANLIB[@]%ranlib%
|
||||
s%@OPT[@]%-g -O2%
|
||||
s%@LDFLAGS[@]%%
|
||||
s%@DEFS[@]%-DHAVE_CONFIG_H%
|
||||
s%@LIBS[@]%-lieee -ldl -lpthread%
|
||||
s%@LIBM[@]%-lm%
|
||||
s%@LIBC[@]%%
|
||||
s%@MACHDEP[@]%linux2%
|
||||
s%^prefix=.*%prefix= /usr%
|
||||
s%^exec_prefix=.*%exec_prefix= ${prefix}%
|
||||
s%@SO[@]%.so%
|
||||
s%@LDSHARED[@]%gcc -shared -lc%
|
||||
s%@CCSHARED[@]%-fPIC%
|
||||
s%@LINKFORSHARED[@]%-Xlinker -export-dynamic%
|
||||
s%@LINKCC[@]%$(CC)%
|
||||
/^#@SET_CCC@/d
|
||||
/^installdir=/s%=.*%= /usr%
|
||||
/^exec_installdir=/s%=.*%=/usr%
|
||||
/^srcdir=/s%=.*%= .%
|
||||
/^VPATH=/s%=.*%= .%
|
||||
/^LINKPATH=/s%=.*%= %
|
||||
/^BASELIB=/s%=.*%= %
|
||||
/^BASESETUP=/s%=.*%= %
|
||||
51
PyLR/tstpspec
Normal file
51
PyLR/tstpspec
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
#
|
||||
# this is a Grammar Spec for parsing PyLR style
|
||||
# Grammars
|
||||
#
|
||||
|
||||
#
|
||||
# this is the pydefs section, where you name the output class
|
||||
# , add code, state how to initialize the lexer
|
||||
#
|
||||
_class GrammarParser
|
||||
_code import PyLR.Lexers
|
||||
_code import PyLR.Parser
|
||||
_lex PyLR.Lexers.GrammarLex()
|
||||
|
||||
#
|
||||
# this is the Grammar spec part, where you specify
|
||||
# the productions and optionally their corresponding
|
||||
# method names in the generated Parser class (or subclasses
|
||||
# of it)
|
||||
#
|
||||
"""
|
||||
pspec: gspec |
|
||||
pydefs gspec;
|
||||
|
||||
gspec: GDEL lhsdeflist GDEL;
|
||||
|
||||
pydefs: pydefs pydef |
|
||||
pydef;
|
||||
|
||||
pydef: LEX (lexdef) |
|
||||
CODE (addcode) |
|
||||
CLASS (classname);
|
||||
|
||||
lhsdeflist: lhsdeflist lhsdef |
|
||||
lhsdef;
|
||||
|
||||
lhsdef: ID COLON rhslist SCOLON (lhsdef);
|
||||
|
||||
rhslist: rhs (singletolist) |
|
||||
rhslist OR rhs (rhslist_OR_rhs);
|
||||
|
||||
rhs: rhsidlist (rhs_idlist) |
|
||||
rhsidlist LPAREN ID RPAREN (rhs_idlist_func);
|
||||
|
||||
rhsidlist: idlist
|
||||
| (rhseps);
|
||||
|
||||
|
||||
idlist: idlist ID (idl_idlistID) |
|
||||
ID (idlistID);
|
||||
"""
|
||||
25
README
Normal file
25
README
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
LinkChecker
|
||||
=============
|
||||
|
||||
With LinkChecker you can check your HTML documents for broken links.
|
||||
Features:
|
||||
o recursive checking
|
||||
o multithreaded
|
||||
o output can be colored or normal text, HTML, SQL or a GML sitemap graph
|
||||
o HTTP, FTP, mailto:, Gopher, Telnet and local file links are supported
|
||||
Javascript and HTTPS links are currently ignored
|
||||
o restrict link checking to your local domain
|
||||
o HTTP proxy support
|
||||
o give username/password for HTTP and FTP authorization
|
||||
o robots.txt exclusion protocol support
|
||||
|
||||
LinkChecker is licensed under the GNU Public License.
|
||||
Credits go to Guido van Rossum for making Python. His hovercraft is
|
||||
full of eels!
|
||||
As this program is directly derived from my Java link checker, additional
|
||||
credits go to Robert Forsman (the author of JCheckLinks) and his
|
||||
robots.txt parse algorithm.
|
||||
|
||||
I want to thank everybody who gave me feedback, bug reports and
|
||||
suggestions.
|
||||
|
||||
84
README.dns
Normal file
84
README.dns
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
Release 2.2, Mon Apr 27 22:59:16 EST 1998
|
||||
|
||||
This is a test release of the DNS code, as originally written by
|
||||
Guido van Rossum, and with a hopefully nicer API bolted over the
|
||||
top of it by Anthony Baxter <arb@connect.com.au>. It's also in a
|
||||
python 1.5 package.
|
||||
|
||||
There are several known bugs/unfinished bits
|
||||
|
||||
- processing of AXFR results is not done yet.
|
||||
- something I've done recently has broken the DnsAsyncRequest(). Bummer.
|
||||
- doesn't do IPv6 DNS requests (type AAAA) (as per [RFC 1886])
|
||||
- docs, aside from this file
|
||||
- all sorts of other stuff that I've probably forgotten.
|
||||
|
||||
Stuff it _does_ do:
|
||||
processes /etc/resolv.conf - at least as far as nameserver directives go.
|
||||
tries multiple nameservers.
|
||||
nicer API - see below.
|
||||
returns results in more useful format.
|
||||
optional timing of requests.
|
||||
default 'show' behaviour emulates 'dig' pretty closely.
|
||||
support for asyncore.py ### NOTE: currently broken a bit.
|
||||
|
||||
|
||||
To use:
|
||||
|
||||
import DNS
|
||||
reqobj=DNS.Request(args)
|
||||
reqobj.req(args)
|
||||
|
||||
args can be a name, in which case it takes that as the query, and/or a series
|
||||
of keyword/value args. (see below for a list of args)
|
||||
|
||||
when calling the 'req()' method, it reuses the options specified in the
|
||||
DNS.Request() call as defaults.
|
||||
|
||||
options are applied in the following order:
|
||||
those specified in the req() call
|
||||
or, if not specified there,
|
||||
those specified in the creation of the Request() object
|
||||
or, if not specified there,
|
||||
those specified in the DNS.defaults dictionary
|
||||
|
||||
name servers can be specified in the following ways:
|
||||
by calling DNS.ParseResolvConf(), which will load the DNS.servers
|
||||
from the system's /etc/resolv.conf file
|
||||
by specifying it as an option to the request
|
||||
by manually setting DNS.defaults['server'] to a list of server IP
|
||||
addresses to try
|
||||
XXXX It should be possible to load the DNS servers on a windows or
|
||||
mac box, from where-ever they've squirrelled them away
|
||||
|
||||
name="host.do.main" # the object being looked up
|
||||
qtype="SOA" # the query type, eg SOA, A, MX, CNAME, ANY
|
||||
protocol="udp" # "udp" or "tcp" - usually you want "udp"
|
||||
server="nameserver" # the name of the nameserver. Note that you might
|
||||
# want to use an IP address here
|
||||
rd=1 # "recursion desired" - defaults to 1.
|
||||
other: opcode, port, ...
|
||||
|
||||
There's also some convenience functions, for the lazy:
|
||||
|
||||
to do a reverse lookup:
|
||||
>>> print DNS.revlookup("192.189.54.17")
|
||||
yarrina.connect.com.au
|
||||
|
||||
to look up all MX records for an entry:
|
||||
>>> print DNS.mxlookup("connect.com.au")
|
||||
[(10, 'yarrina.connect.com.au'), (100, 'warrane.connect.com.au')]
|
||||
|
||||
Documentation of the rest of the interface will have to wait for a
|
||||
later date. Note that the DnsAsyncRequest stuff is currently not
|
||||
working - I haven't looked too closely at why, yet.
|
||||
|
||||
There's some examples in the tests/ directory - including test5.py,
|
||||
which is even vaguely useful. It looks for the SOA for a domain, checks
|
||||
that the primary NS is authoritative, then checks the nameservers
|
||||
that it believes are NSs for the domain and checks that they're
|
||||
authoritative, and that the zone serial numbers match.
|
||||
|
||||
see also README.guido for the original docs.
|
||||
|
||||
comments to me - arb@connect.com.au
|
||||
136
StringUtil.py
Normal file
136
StringUtil.py
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
import string,re
|
||||
|
||||
HtmlTable = [
|
||||
("ä","ä"),
|
||||
("ö","ö"),
|
||||
("ü","ü"),
|
||||
("Ä","Ä"),
|
||||
("Ö","Ö"),
|
||||
("Ü","Ü"),
|
||||
("ß","ß"),
|
||||
("&","&"),
|
||||
("<","<"),
|
||||
(">",">"),
|
||||
("é","é"),
|
||||
("è","è")
|
||||
]
|
||||
|
||||
SQLTable = [
|
||||
("'","''")
|
||||
]
|
||||
|
||||
def stripHtmlComments(data):
|
||||
i = string.find(data, "<!--")
|
||||
while i!=-1:
|
||||
j = string.find(data, "-->", i)
|
||||
if j == -1:
|
||||
break
|
||||
data = data[:i] + data[j+3:]
|
||||
i = string.find(data, "<!--")
|
||||
return data
|
||||
|
||||
|
||||
def stripFenceComments(data):
|
||||
lines = string.split(data, "\n")
|
||||
ret = None
|
||||
for line in lines:
|
||||
if not re.compile("\s*#.*").match(line):
|
||||
if ret:
|
||||
ret = ret + "\n" + line
|
||||
else:
|
||||
ret = line
|
||||
return ret
|
||||
|
||||
|
||||
def rstripQuotes(s):
|
||||
"Strip optional ending quotes"
|
||||
if len(s)<1:
|
||||
return s
|
||||
if s[-1]=="\"" or s[-1]=="'":
|
||||
s = s[:-1]
|
||||
return s
|
||||
|
||||
|
||||
def lstripQuotes(s):
|
||||
"Strip optional leading quotes"
|
||||
if len(s)<1:
|
||||
return s
|
||||
if s[0]=="\"" or s[0]=="'":
|
||||
s = s[1:]
|
||||
return s
|
||||
|
||||
|
||||
def stripQuotes(s):
|
||||
"Strip optional quotes"
|
||||
if len(s)<2:
|
||||
return s
|
||||
if s[0]=="\"" or s[0]=="'":
|
||||
s = s[1:]
|
||||
if s[-1]=="\"" or s[-1]=="'":
|
||||
s = s[:-1]
|
||||
return s
|
||||
|
||||
|
||||
def indent(s, level):
|
||||
return indentWith(s, level * " ")
|
||||
|
||||
|
||||
def indentWith(s, indent):
|
||||
i = 0
|
||||
while i < len(s):
|
||||
if s[i]=="\n" and (i+1) < len(s):
|
||||
s = s[0:(i+1)] + indent + s[(i+1):]
|
||||
i = i+1
|
||||
return s
|
||||
|
||||
def blocktext(s, width):
|
||||
"Adjust lines of s to be not wider than width"
|
||||
# split into lines
|
||||
s = string.split(s, "\n")
|
||||
line = ""
|
||||
ret = ""
|
||||
while len(s):
|
||||
line = line + s.pop()
|
||||
while len(line) > width:
|
||||
i = getLastWordBoundary(line, width)
|
||||
ret = ret + string.strip(line[0:i]) + "\n"
|
||||
line = string.strip(line[i:])
|
||||
return ret + line
|
||||
|
||||
|
||||
def getLastWordBoundary(s, width):
|
||||
"""Get maximal index i of a whitespace char in s with 0 < i < width.
|
||||
Note: if s contains no whitespace this returns width-1"""
|
||||
match = re.compile(".*\s").match(s[0:width])
|
||||
if match:
|
||||
return match.end()
|
||||
return width-1
|
||||
|
||||
|
||||
def applyTable(table, str):
|
||||
for mapping in table:
|
||||
str = string.replace(str, mapping[0], mapping[1])
|
||||
return str
|
||||
|
||||
|
||||
def texify(str):
|
||||
return applyTable(TexTable, str)
|
||||
|
||||
def sqlify(str):
|
||||
if not str:
|
||||
return "NULL"
|
||||
return "'"+applyTable(SQLTable, str)+"'"
|
||||
|
||||
def htmlify(str):
|
||||
return applyTable(HtmlTable, str)
|
||||
|
||||
def getLineNumber(str, index):
|
||||
i=0
|
||||
if index<0: index=0
|
||||
line=1
|
||||
while i<index:
|
||||
if str[i]=='\n':
|
||||
line = line + 1
|
||||
i = i+1
|
||||
return line
|
||||
|
||||
10
TODO
Normal file
10
TODO
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
Use leading '_' for private functions.
|
||||
|
||||
Is there a way to cleanly stop arbitrary Thread objects
|
||||
(with exit handler)? Mail me solutions!
|
||||
|
||||
Patch the PyLR parser module to suit my needs.
|
||||
|
||||
Write a graph layout algorithm.
|
||||
|
||||
Write a little tool to produce an image of the GML output.
|
||||
18
create.sql
Normal file
18
create.sql
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
-- tested with postgresql
|
||||
|
||||
drop table linkdb;
|
||||
|
||||
create table linkdb (
|
||||
urlname varchar(50) not null,
|
||||
recursionlevel int not null,
|
||||
parentname varchar(50),
|
||||
baseref varchar(50),
|
||||
errorstring varchar(50),
|
||||
validstring varchar(50),
|
||||
warningstring varchar(50),
|
||||
infoString varchar(150),
|
||||
valid int,
|
||||
url varchar(50),
|
||||
line int,
|
||||
cached int
|
||||
);
|
||||
1
debian/.cvsignore
vendored
Normal file
1
debian/.cvsignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
tmp
|
||||
107
debian/changelog
vendored
Normal file
107
debian/changelog
vendored
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
pylice (0.9.0) unstable; urgency=low
|
||||
|
||||
* See ChangeLog
|
||||
|
||||
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Mon, 21 Feb 2000 16:26:22 +0100
|
||||
|
||||
pylice (0.8.0) unstable; urgency=low
|
||||
|
||||
* See ChangeLog
|
||||
|
||||
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Thu, 10 Feb 2000 21:32:55 +0000
|
||||
|
||||
pylice (0.7.0) unstable; urgency=low
|
||||
|
||||
* See CHANGES
|
||||
|
||||
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Thu, 27 Jan 2000 23:15:24 +0100
|
||||
|
||||
pylice (0.6.2) unstable; urgency=low
|
||||
|
||||
* See CHANGES
|
||||
|
||||
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Wed, 26 Jan 2000 11:41:28 +0100
|
||||
|
||||
pylice (0.6.1) unstable; urgency=low
|
||||
|
||||
* See CHANGES
|
||||
|
||||
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Tue, 25 Jan 2000 21:11:15 +0100
|
||||
|
||||
pylice (0.6.0) unstable; urgency=low
|
||||
|
||||
* See CHANGES
|
||||
|
||||
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Wed, 19 Jan 2000 00:25:55 +0100
|
||||
|
||||
pylice (0.5.0) unstable; urgency=low
|
||||
|
||||
* See CHANGES
|
||||
|
||||
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Tue, 18 Jan 2000 00:39:31 +0100
|
||||
|
||||
pylice (0.4.4) unstable; urgency=low
|
||||
|
||||
* See CHANGES
|
||||
|
||||
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Mon, 17 Jan 2000 12:21:10 +0100
|
||||
|
||||
pylice (0.4.3) unstable; urgency=low
|
||||
|
||||
* See CHANGES
|
||||
|
||||
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Fri, 14 Jan 2000 02:10:20 +0100
|
||||
|
||||
pylice (0.4.2) unstable; urgency=low
|
||||
|
||||
* See CHANGES
|
||||
|
||||
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Thu, 13 Jan 2000 21:48:23 +0100
|
||||
|
||||
pylice (0.4.1) unstable; urgency=low
|
||||
|
||||
* See CHANGES
|
||||
|
||||
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Wed, 12 Jan 2000 13:34:42 +0100
|
||||
|
||||
pylice (0.4.0) unstable; urgency=low
|
||||
|
||||
* See CHANGES
|
||||
|
||||
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Tue, 11 Jan 2000 13:48:53 +0100
|
||||
|
||||
pylice (0.3.0) unstable; urgency=low
|
||||
|
||||
* See CHANGES
|
||||
|
||||
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Tue, 11 Jan 2000 00:01:37 +0100
|
||||
|
||||
pylice (0.2.1) unstable; urgency=low
|
||||
|
||||
* See CHANGES
|
||||
|
||||
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Mon, 10 Jan 2000 22:01:54 +0100
|
||||
|
||||
pylice (0.2.0) unstable; urgency=low
|
||||
|
||||
* See CHANGES
|
||||
|
||||
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Mon, 10 Jan 2000 21:28:38 +0100
|
||||
|
||||
pylice (0.1.0) unstable; urgency=low
|
||||
|
||||
* New release. Missing features: robots.txt exclusion standard,
|
||||
proxy setting, user/password setting and
|
||||
mailto:, telnet:, gopher: link checking
|
||||
|
||||
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Sun, 9 Jan 2000 14:32:15 +0100
|
||||
|
||||
pylice (0.0.1) unstable; urgency=low
|
||||
|
||||
* Initial Release.
|
||||
|
||||
-- Bastian Kleineidam <calvin@cs.uni-sb.de> Sat, 8 Jan 2000 11:00:35 +0100
|
||||
|
||||
Local variables:
|
||||
mode: debian-changelog
|
||||
End:
|
||||
21
debian/control
vendored
Normal file
21
debian/control
vendored
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
Source: linkchecker
|
||||
Section: web
|
||||
Priority: optional
|
||||
Maintainer: Bastian Kleineidam <calvin@cs.uni-sb.de>
|
||||
Standards-Version: 3.0.1
|
||||
|
||||
Package: linkchecker
|
||||
Architecture: any
|
||||
Depends: python-base
|
||||
Description: LinkChecker is a link checker written in Python
|
||||
With LinkChecker you can check your HTML documents for broken links.
|
||||
Features:
|
||||
o recursive checking
|
||||
o multithreaded
|
||||
o output can be colored or normal text, HTML, SQL or a GML sitemap graph
|
||||
o HTTP, FTP, mailto:, Gopher, Telnet and local file links are supported
|
||||
Javascript and HTTPS links are currently ignored
|
||||
o restrict link checking to your local domain
|
||||
o HTTP proxy support
|
||||
o give username/password for HTTP and FTP authorization
|
||||
o robots.txt exclusion protocol support
|
||||
22
debian/copyright
vendored
Normal file
22
debian/copyright
vendored
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
This is linkchecker, written and maintained by Bastian Kleineidam <calvin@cs.uni-sb.de>
|
||||
on Sat, 8 Jan 2000 11:00:35 +0100.
|
||||
|
||||
The original source can always be found at:
|
||||
http://linkchecker.sourceforge.net
|
||||
|
||||
Copyright (C) 2000 Bastian Kleineidam
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License with
|
||||
the Debian GNU/Linux distribution in file /usr/doc/copyright/GPL;
|
||||
if not, write to the Free Software Foundation, Inc., 59 Temple Place,
|
||||
Suite 330, Boston, MA 02111-1307 USA
|
||||
0
debian/dirs
vendored
Normal file
0
debian/dirs
vendored
Normal file
4
debian/docs
vendored
Normal file
4
debian/docs
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
INSTALL
|
||||
README
|
||||
README.dns
|
||||
ChangeLog
|
||||
22
debian/ex.doc-base.package
vendored
Normal file
22
debian/ex.doc-base.package
vendored
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
Document: pylice
|
||||
Title: Debian pylice Manual
|
||||
Author: <insert document author here>
|
||||
Abstract: This manual describes what pylice is
|
||||
and how it can be used to
|
||||
manage online manuals on Debian systems.
|
||||
Section: unknown
|
||||
|
||||
Format: debiandoc-sgml
|
||||
Files: /usr/doc/pylice/pylice.sgml.gz
|
||||
|
||||
Format: postscript
|
||||
Files: /usr/doc/pylice/pylice.ps.gz
|
||||
|
||||
Format: text
|
||||
Files: /usr/doc/pylice/pylice.text.gz
|
||||
|
||||
Format: HTML
|
||||
Index: /usr/doc/pylice/html/index.html
|
||||
Files: /usr/doc/pylice/html/*.html
|
||||
|
||||
|
||||
1
debian/files
vendored
Normal file
1
debian/files
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
pylice_0.8.0_i386.deb web optional
|
||||
70
debian/init.d.ex
vendored
Normal file
70
debian/init.d.ex
vendored
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
#! /bin/sh
|
||||
#
|
||||
# skeleton example file to build /etc/init.d/ scripts.
|
||||
# This file should be used to construct scripts for /etc/init.d.
|
||||
#
|
||||
# Written by Miquel van Smoorenburg <miquels@cistron.nl>.
|
||||
# Modified for Debian GNU/Linux
|
||||
# by Ian Murdock <imurdock@gnu.ai.mit.edu>.
|
||||
#
|
||||
# Version: @(#)skeleton 1.8 03-Mar-1998 miquels@cistron.nl
|
||||
#
|
||||
# This file was automatically customized by dh-make on Sat, 8 Jan 2000 11:00:35 +0100
|
||||
|
||||
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
|
||||
DAEMON=/usr/sbin/pylice
|
||||
NAME=pylice
|
||||
DESC=pylice
|
||||
|
||||
test -f $DAEMON || exit 0
|
||||
|
||||
set -e
|
||||
|
||||
case "$1" in
|
||||
start)
|
||||
echo -n "Starting $DESC: "
|
||||
start-stop-daemon --start --quiet --pidfile /var/run/$NAME.pid \
|
||||
--exec $DAEMON
|
||||
echo "$NAME."
|
||||
;;
|
||||
stop)
|
||||
echo -n "Stopping $DESC: "
|
||||
start-stop-daemon --stop --quiet --pidfile /var/run/$NAME.pid \
|
||||
--exec $DAEMON
|
||||
echo "$NAME."
|
||||
;;
|
||||
#reload)
|
||||
#
|
||||
# If the daemon can reload its config files on the fly
|
||||
# for example by sending it SIGHUP, do it here.
|
||||
#
|
||||
# If the daemon responds to changes in its config file
|
||||
# directly anyway, make this a do-nothing entry.
|
||||
#
|
||||
# echo "Reloading $DESC configuration files."
|
||||
# start-stop-daemon --stop --signal 1 --quiet --pidfile \
|
||||
# /var/run/$NAME.pid --exec $DAEMON
|
||||
#;;
|
||||
restart|force-reload)
|
||||
#
|
||||
# If the "reload" option is implemented, move the "force-reload"
|
||||
# option to the "reload" entry above. If not, "force-reload" is
|
||||
# just the same as "restart".
|
||||
#
|
||||
echo -n "Restarting $DESC: "
|
||||
start-stop-daemon --stop --quiet --pidfile \
|
||||
/var/run/$NAME.pid --exec $DAEMON
|
||||
sleep 1
|
||||
start-stop-daemon --start --quiet --pidfile \
|
||||
/var/run/$NAME.pid --exec $DAEMON
|
||||
echo "$NAME."
|
||||
;;
|
||||
*)
|
||||
N=/etc/init.d/$NAME
|
||||
# echo "Usage: $N {start|stop|restart|reload|force-reload}" >&2
|
||||
echo "Usage: $N {start|stop|restart|force-reload}" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
60
debian/manpage.1.ex
vendored
Normal file
60
debian/manpage.1.ex
vendored
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
.\" Hey, EMACS: -*- nroff -*-
|
||||
.\" First parameter, NAME, should be all caps
|
||||
.\" Second parameter, SECTION, should be 1-8, maybe w/ subsection
|
||||
.\" other parameters are allowed: see man(7), man(1)
|
||||
.TH PYLICE SECTION "January 8, 2000"
|
||||
.\" Please adjust this date whenever revising the manpage.
|
||||
.\"
|
||||
.\" Some roff macros, for reference:
|
||||
.\" .nh disable hyphenation
|
||||
.\" .hy enable hyphenation
|
||||
.\" .ad l left justify
|
||||
.\" .ad b justify to both left and right margins
|
||||
.\" .nf disable filling
|
||||
.\" .fi enable filling
|
||||
.\" .br insert line break
|
||||
.\" .sp <n> insert n+1 empty lines
|
||||
.\" for manpage-specific macros, see man(7)
|
||||
.SH NAME
|
||||
pylice \- program to do something
|
||||
.SH SYNOPSIS
|
||||
.B pylice
|
||||
.RI [ options ] " files" ...
|
||||
.br
|
||||
.B bar
|
||||
.RI [ options ] " files" ...
|
||||
.SH DESCRIPTION
|
||||
This manual page documents briefly the
|
||||
.B pylice
|
||||
and
|
||||
.B bar
|
||||
commands.
|
||||
This manual page was written for the Debian GNU/Linux distribution
|
||||
because the original program does not have a manual page.
|
||||
Instead, it has documentation in the GNU Info format; see below.
|
||||
.PP
|
||||
.\" TeX users may be more comfortable with the \fB<whatever>\fP and
|
||||
.\" \fI<whatever>\fP escape sequences to invode bold face and italics,
|
||||
.\" respectively.
|
||||
\fBpylice\fP is a program that...
|
||||
.SH OPTIONS
|
||||
These programs follow the usual GNU command line syntax, with long
|
||||
options starting with two dashes (`-').
|
||||
A summary of options is included below.
|
||||
For a complete description, see the Info files.
|
||||
.TP
|
||||
.B \-h, \-\-help
|
||||
Show summary of options.
|
||||
.TP
|
||||
.B \-v, \-\-version
|
||||
Show version of program.
|
||||
.SH SEE ALSO
|
||||
.BR bar (1),
|
||||
.BR baz (1).
|
||||
.br
|
||||
The programs are documented fully by
|
||||
.IR "The Rise and Fall of a Fooish Bar" ,
|
||||
available via the Info system.
|
||||
.SH AUTHOR
|
||||
This manual page was written by Bastian Kleineidam <calvin@mail.calvinsplayground.de>,
|
||||
for the Debian GNU/Linux system (but may be used by others).
|
||||
2
debian/menu.ex
vendored
Normal file
2
debian/menu.ex
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
?package(linkchecker):needs=X11|text|vc|wm section=Apps/see-menu-manual\
|
||||
title="linkchecer" command="/usr/bin/linkchecker"
|
||||
7
debian/postinst.debhelper
vendored
Normal file
7
debian/postinst.debhelper
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
# Automatically added by dh_installdocs
|
||||
if [ "$1" = "configure" ]; then
|
||||
if [ -d /usr/doc -a ! -e /usr/doc/pylice -a -d /usr/share/doc/pylice ]; then
|
||||
ln -sf ../share/doc/pylice /usr/doc/pylice
|
||||
fi
|
||||
fi
|
||||
# End automatically added section
|
||||
47
debian/postinst.ex
vendored
Normal file
47
debian/postinst.ex
vendored
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
#! /bin/sh
|
||||
# postinst script for pylice
|
||||
#
|
||||
# see: dh_installdeb(1)
|
||||
|
||||
set -e
|
||||
|
||||
# summary of how this script can be called:
|
||||
# * <postinst> `configure' <most-recently-configured-version>
|
||||
# * <old-postinst> `abort-upgrade' <new version>
|
||||
# * <conflictor's-postinst> `abort-remove' `in-favour' <package>
|
||||
# <new-version>
|
||||
# * <deconfigured's-postinst> `abort-deconfigure' `in-favour'
|
||||
# <failed-install-package> <version> `removing'
|
||||
# <conflicting-package> <version>
|
||||
# for details, see /usr/doc/packaging-manual/
|
||||
#
|
||||
# quoting from the policy:
|
||||
# Any necessary prompting should almost always be confined to the
|
||||
# post-installation script, and should be protected with a conditional
|
||||
# so that unnecessary prompting doesn't happen if a package's
|
||||
# installation fails and the `postinst' is called with `abort-upgrade',
|
||||
# `abort-remove' or `abort-deconfigure'.
|
||||
|
||||
case "$1" in
|
||||
configure)
|
||||
|
||||
;;
|
||||
|
||||
abort-upgrade|abort-remove|abort-deconfigure)
|
||||
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "postinst called with unknown argument \`$1'" >&2
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
|
||||
# dh_installdeb will replace this with shell code automatically
|
||||
# generated by other debhelper scripts.
|
||||
|
||||
#DEBHELPER#
|
||||
|
||||
exit 0
|
||||
|
||||
|
||||
38
debian/postrm.ex
vendored
Normal file
38
debian/postrm.ex
vendored
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
#! /bin/sh
|
||||
# postrm script for pylice
|
||||
#
|
||||
# see: dh_installdeb(1)
|
||||
|
||||
set -e
|
||||
|
||||
# summary of how this script can be called:
|
||||
# * <postrm> `remove'
|
||||
# * <postrm> `purge'
|
||||
# * <old-postrm> `upgrade' <new-version>
|
||||
# * <new-postrm> `failed-upgrade' <old-version>
|
||||
# * <new-postrm> `abort-install'
|
||||
# * <new-postrm> `abort-install' <old-version>
|
||||
# * <new-postrm> `abort-upgrade' <old-version>
|
||||
# * <disappearer's-postrm> `disappear' <r>overwrit>r> <new-version>
|
||||
# for details, see /usr/doc/packaging-manual/
|
||||
|
||||
case "$1" in
|
||||
purge|remove|upgrade|failed-upgrade|abort-install|abort-upgrade|disappear)
|
||||
|
||||
# update the menu system
|
||||
# if [ -x /usr/bin/update-menus ]; then update-menus; fi
|
||||
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "postrm called with unknown argument \`$1'" >&2
|
||||
exit 0
|
||||
|
||||
esac
|
||||
|
||||
# dh_installdeb will replace this with shell code automatically
|
||||
# generated by other debhelper scripts.
|
||||
|
||||
#DEBHELPER#
|
||||
|
||||
|
||||
40
debian/preinst.ex
vendored
Normal file
40
debian/preinst.ex
vendored
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
#! /bin/sh
|
||||
# preinst script for pylice
|
||||
#
|
||||
# see: dh_installdeb(1)
|
||||
|
||||
set -e
|
||||
|
||||
# summary of how this script can be called:
|
||||
# * <new-preinst> `install'
|
||||
# * <new-preinst> `install' <old-version>
|
||||
# * <new-preinst> `upgrade' <old-version>
|
||||
# * <old-preinst> `abort-upgrade' <new-version>
|
||||
|
||||
case "$1" in
|
||||
install|upgrade)
|
||||
# if [ "$1" = "upgrade" ]
|
||||
# then
|
||||
# start-stop-daemon --stop --quiet --oknodo \
|
||||
# --pidfile /var/run/pylice.pid \
|
||||
# --exec /usr/sbin/pylice 2>/dev/null || true
|
||||
# fi
|
||||
;;
|
||||
|
||||
abort-upgrade)
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "preinst called with unknown argument \`$1'" >&2
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
|
||||
# dh_installdeb will replace this with shell code automatically
|
||||
# generated by other debhelper scripts.
|
||||
|
||||
#DEBHELPER#
|
||||
|
||||
exit 0
|
||||
|
||||
|
||||
5
debian/prerm.debhelper
vendored
Normal file
5
debian/prerm.debhelper
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Automatically added by dh_installdocs
|
||||
if [ \( "$1" = "upgrade" -o "$1" = "remove" \) -a -L /usr/doc/pylice ]; then
|
||||
rm -f /usr/doc/pylice
|
||||
fi
|
||||
# End automatically added section
|
||||
37
debian/prerm.ex
vendored
Normal file
37
debian/prerm.ex
vendored
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
#! /bin/sh
|
||||
# prerm script for pylice
|
||||
#
|
||||
# see: dh_installdeb(1)
|
||||
|
||||
set -e
|
||||
|
||||
# summary of how this script can be called:
|
||||
# * <prerm> `remove'
|
||||
# * <old-prerm> `upgrade' <new-version>
|
||||
# * <new-prerm> `failed-upgrade' <old-version>
|
||||
# * <conflictor's-prerm> `remove' `in-favour' <package> <new-version>
|
||||
# * <deconfigured's-prerm> `deconfigure' `in-favour'
|
||||
# <package-being-installed> <version> `removing'
|
||||
# <conflicting-package> <version>
|
||||
# for details, see /usr/doc/packaging-manual/
|
||||
|
||||
case "$1" in
|
||||
remove|upgrade|deconfigure)
|
||||
# install-info --quiet --remove /usr/info/pylice.info.gz
|
||||
;;
|
||||
failed-upgrade)
|
||||
;;
|
||||
*)
|
||||
echo "prerm called with unknown argument \`$1'" >&2
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
|
||||
# dh_installdeb will replace this with shell code automatically
|
||||
# generated by other debhelper scripts.
|
||||
|
||||
#DEBHELPER#
|
||||
|
||||
exit 0
|
||||
|
||||
|
||||
76
debian/rules
vendored
Executable file
76
debian/rules
vendored
Executable file
|
|
@ -0,0 +1,76 @@
|
|||
#!/usr/bin/make -f
|
||||
# Sample debian/rules that uses debhelper.
|
||||
# GNU copyright 1997 to 1999 by Joey Hess.
|
||||
|
||||
# Uncomment this to turn on verbose mode.
|
||||
#export DH_VERBOSE=1
|
||||
|
||||
# This is the debhelper compatability version to use.
|
||||
export DH_COMPAT=1
|
||||
|
||||
build: build-stamp
|
||||
build-stamp:
|
||||
dh_testdir
|
||||
|
||||
|
||||
# Add here commands to compile the package.
|
||||
$(MAKE)
|
||||
|
||||
touch build-stamp
|
||||
|
||||
clean:
|
||||
dh_testdir
|
||||
# dh_testroot
|
||||
rm -f build-stamp
|
||||
|
||||
# Add here commands to clean up after the build process.
|
||||
-$(MAKE) clean
|
||||
|
||||
dh_clean
|
||||
|
||||
install: build
|
||||
dh_testdir
|
||||
# dh_testroot
|
||||
dh_clean -k
|
||||
# dh_installdirs
|
||||
# Add here commands to install the package into debian/tmp.
|
||||
$(MAKE) install DESTDIR=`pwd`/debian/tmp
|
||||
|
||||
|
||||
# Build architecture-independent files here.
|
||||
binary-indep: build install
|
||||
# We have nothing to do by default.
|
||||
|
||||
# Build architecture-dependent files here.
|
||||
binary-arch: build install
|
||||
# dh_testversion
|
||||
dh_testdir
|
||||
# dh_testroot
|
||||
# dh_installdebconf
|
||||
dh_installdocs
|
||||
# dh_installexamples
|
||||
# dh_installmenu
|
||||
# dh_installemacsen
|
||||
# dh_installpam
|
||||
# dh_installinit
|
||||
# dh_installcron
|
||||
# dh_installmanpages
|
||||
# dh_installinfo
|
||||
dh_undocumented
|
||||
# dh_installchangelogs
|
||||
# dh_link
|
||||
# dh_strip
|
||||
dh_compress
|
||||
dh_fixperms
|
||||
# You may want to make some executables suid here.
|
||||
# dh_suidregister
|
||||
# dh_makeshlibs
|
||||
dh_installdeb
|
||||
# dh_perl
|
||||
# dh_shlibdeps
|
||||
dh_gencontrol
|
||||
dh_md5sums
|
||||
dh_builddeb
|
||||
|
||||
binary: binary-indep binary-arch
|
||||
.PHONY: build clean binary-indep binary-arch binary install
|
||||
5
debian/watch.ex
vendored
Normal file
5
debian/watch.ex
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Example watch control file for uscan
|
||||
# Rename this file to "watch" and then you can run the "uscan" command
|
||||
# to check for upstream updates and more.
|
||||
# Site Directory Pattern Version Script
|
||||
sunsite.unc.edu /pub/Linux/Incoming pylice-(.*)\.tar\.gz debian uupdate
|
||||
382
httplib.py
Normal file
382
httplib.py
Normal file
|
|
@ -0,0 +1,382 @@
|
|||
#
|
||||
# HTTP/1.1 client library
|
||||
#
|
||||
# Copyright (C) 1998-1999 Guido van Rossum. All Rights Reserved.
|
||||
# Written by Greg Stein. Given to Guido. Licensed using the Python license.
|
||||
#
|
||||
# This module is maintained by Greg and is available at:
|
||||
# http://www.lyra.org/greg/python/httplib.py
|
||||
#
|
||||
# Since this isn't in the Python distribution yet, we'll use the CVS ID
|
||||
# for tracking:
|
||||
# $Id$
|
||||
#
|
||||
|
||||
import socket,string,mimetools,httplib
|
||||
|
||||
|
||||
error = __name__ + '.error'
|
||||
|
||||
HTTP_PORT = 80
|
||||
|
||||
class HTTPResponse(mimetools.Message):
|
||||
def __init__(self, fp, version, errcode):
|
||||
mimetools.Message.__init__(self, fp, 0)
|
||||
|
||||
if version == 'HTTP/1.0':
|
||||
self.version = 10
|
||||
elif version[:7] == 'HTTP/1.':
|
||||
self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
|
||||
else:
|
||||
raise error, 'unknown HTTP protocol'
|
||||
|
||||
# are we using the chunked-style of transfer encoding?
|
||||
tr_enc = self.getheader('transfer-encoding')
|
||||
if tr_enc:
|
||||
if string.lower(tr_enc) != 'chunked':
|
||||
raise error, 'unknown transfer-encoding'
|
||||
self.chunked = 1
|
||||
self.chunk_left = None
|
||||
else:
|
||||
self.chunked = 0
|
||||
|
||||
# will the connection close at the end of the response?
|
||||
conn = self.getheader('connection')
|
||||
if conn:
|
||||
conn = string.lower(conn)
|
||||
# a "Connection: close" will always close the connection. if we
|
||||
# don't see that and this is not HTTP/1.1, then the connection will
|
||||
# close unless we see a Keep-Alive header.
|
||||
self.will_close = string.find(conn, 'close') != -1 or \
|
||||
( self.version != 11 and \
|
||||
not self.getheader('keep-alive') )
|
||||
else:
|
||||
# for HTTP/1.1, the connection will always remain open
|
||||
# otherwise, it will remain open IFF we see a Keep-Alive header
|
||||
self.will_close = self.version != 11 and \
|
||||
not self.getheader('keep-alive')
|
||||
|
||||
# do we have a Content-Length?
|
||||
# NOTE: RFC 2616, S4.4, #3 states we ignore this if tr_enc is "chunked"
|
||||
length = self.getheader('content-length')
|
||||
if length and not self.chunked:
|
||||
self.length = int(length)
|
||||
else:
|
||||
self.length = None
|
||||
|
||||
# does the body have a fixed length? (of zero)
|
||||
if (errcode == 204 or # No Content
|
||||
errcode == 304 or # Not Modified
|
||||
100 <= errcode < 200): # 1xx codes
|
||||
self.length = 0
|
||||
|
||||
# if the connection remains open, and we aren't using chunked, and
|
||||
# a content-length was not provided, then assume that the connection
|
||||
# WILL close.
|
||||
if not self.will_close and \
|
||||
not self.chunked and \
|
||||
self.length is None:
|
||||
self.will_close = 1
|
||||
|
||||
|
||||
def close(self):
|
||||
if self.fp:
|
||||
self.fp.close()
|
||||
self.fp = None
|
||||
|
||||
|
||||
def isclosed(self):
|
||||
# NOTE: it is possible that we will not ever call self.close(). This
|
||||
# case occurs when will_close is TRUE, length is None, and we
|
||||
# read up to the last byte, but NOT past it.
|
||||
#
|
||||
# IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
|
||||
# called, meaning self.isclosed() is meaningful.
|
||||
return self.fp is None
|
||||
|
||||
|
||||
def read(self, amt=None):
|
||||
if not self.fp:
|
||||
return ''
|
||||
|
||||
if self.chunked:
|
||||
chunk_left = self.chunk_left
|
||||
value = ''
|
||||
while 1:
|
||||
if not chunk_left:
|
||||
line = self.fp.readline()
|
||||
i = string.find(line, ';')
|
||||
if i >= 0:
|
||||
line = line[:i] # strip chunk-extensions
|
||||
chunk_left = string.atoi(line, 16)
|
||||
if chunk_left == 0:
|
||||
break
|
||||
if not amt:
|
||||
value = value + self.fp.read(chunk_left)
|
||||
elif amt < chunk_left:
|
||||
value = value + self.fp.read(amt)
|
||||
self.chunk_left = chunk_left - amt
|
||||
return value
|
||||
elif amt == chunk_left:
|
||||
value = value + self.fp.read(amt)
|
||||
self.fp.read(2) # toss the CRLF at the end of the chunk
|
||||
self.chunk_left = None
|
||||
return value
|
||||
else:
|
||||
value = value + self.fp.read(chunk_left)
|
||||
amt = amt - chunk_left
|
||||
|
||||
# we read the whole chunk, get another
|
||||
self.fp.read(2) # toss the CRLF at the end of the chunk
|
||||
chunk_left = None
|
||||
|
||||
# read and discard trailer up to the CRLF terminator
|
||||
### note: we shouldn't have any trailers!
|
||||
while 1:
|
||||
line = self.fp.readline()
|
||||
if line == '\r\n':
|
||||
break
|
||||
|
||||
# we read everything; close the "file"
|
||||
self.close()
|
||||
|
||||
return value
|
||||
|
||||
elif not amt:
|
||||
# unbounded read
|
||||
if self.will_close:
|
||||
s = self.fp.read()
|
||||
else:
|
||||
s = self.fp.read(self.length)
|
||||
self.close() # we read everything
|
||||
return s
|
||||
|
||||
if self.length is not None:
|
||||
if amt > self.length:
|
||||
# clip the read to the "end of response"
|
||||
amt = self.length
|
||||
self.length = self.length - amt
|
||||
|
||||
s = self.fp.read(amt)
|
||||
|
||||
# close our "file" if we know we should
|
||||
### I'm not sure about the len(s) < amt part; we should be safe because
|
||||
### we shouldn't be using non-blocking sockets
|
||||
if self.length == 0 or len(s) < amt:
|
||||
self.close()
|
||||
|
||||
return s
|
||||
|
||||
|
||||
class HTTPConnection:
|
||||
|
||||
_http_vsn = 11
|
||||
_http_vsn_str = 'HTTP/1.1'
|
||||
|
||||
response_class = HTTPResponse
|
||||
|
||||
def __init__(self, host, port=None):
|
||||
self.sock = None
|
||||
self.response = None
|
||||
self._set_hostport(host, port)
|
||||
|
||||
def _set_hostport(self, host, port):
|
||||
if port is None:
|
||||
i = string.find(host, ':')
|
||||
if i >= 0:
|
||||
port = int(host[i+1:])
|
||||
host = host[:i]
|
||||
else:
|
||||
port = HTTP_PORT
|
||||
self.host = host
|
||||
self.port = port
|
||||
|
||||
def connect(self):
|
||||
"""Connect to the host and port specified in __init__."""
|
||||
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
self.sock.connect(self.host, self.port)
|
||||
|
||||
def close(self):
|
||||
"""Close the connection to the HTTP server."""
|
||||
if self.sock:
|
||||
self.sock.close() # close it manually... there may be other refs
|
||||
self.sock = None
|
||||
if self.response:
|
||||
self.response.close()
|
||||
self.response = None
|
||||
|
||||
def send(self, str):
|
||||
"""Send `str' to the server."""
|
||||
if not self.sock:
|
||||
self.connect()
|
||||
|
||||
# send the data to the server. if we get a broken pipe, then close
|
||||
# the socket. we want to reconnect when somebody tries to send again.
|
||||
#
|
||||
# NOTE: we DO propagate the error, though, because we cannot simply
|
||||
# ignore the error... the caller will know if they can retry.
|
||||
try:
|
||||
self.sock.send(str)
|
||||
except socket.error, v:
|
||||
if v[0] == 32: # Broken pipe
|
||||
self.close()
|
||||
raise
|
||||
|
||||
def putrequest(self, method, url='/'):
|
||||
"""Send a request to the server.
|
||||
|
||||
`method' specifies an HTTP request method, e.g. 'GET'.
|
||||
`url' specifies the object being requested, e.g.
|
||||
'/index.html'.
|
||||
"""
|
||||
if self.response:
|
||||
if not self.response.isclosed():
|
||||
### implies half-duplex!
|
||||
raise error, 'prior response has not been fully handled'
|
||||
self.response = None
|
||||
|
||||
if not url:
|
||||
url = '/'
|
||||
str = '%s %s %s\r\n' % (method, url, self._http_vsn_str)
|
||||
|
||||
try:
|
||||
self.send(str)
|
||||
except socket.error, v:
|
||||
if v[0] != 32: # Broken pipe
|
||||
raise
|
||||
# try one more time (the socket was closed; this will reopen)
|
||||
self.send(str)
|
||||
|
||||
self.putheader('Host', self.host)
|
||||
|
||||
if self._http_vsn == 11:
|
||||
# Issue some standard headers for better HTTP/1.1 compliance
|
||||
|
||||
# note: we are assuming that clients will not attempt to set these
|
||||
# headers since *this* library must deal with the consequences.
|
||||
# this also means that when the supporting libraries are
|
||||
# updated to recognize other forms, then this code should be
|
||||
# changed (removed or updated).
|
||||
|
||||
# we only want a Content-Encoding of "identity" since we don't
|
||||
# support encodings such as x-gzip or x-deflate.
|
||||
self.putheader('Accept-Encoding', 'identity')
|
||||
|
||||
# we can accept "chunked" Transfer-Encodings, but no others
|
||||
# NOTE: no TE header implies *only* "chunked"
|
||||
#self.putheader('TE', 'chunked')
|
||||
|
||||
# if TE is supplied in the header, then it must appear in a
|
||||
# Connection header.
|
||||
#self.putheader('Connection', 'TE')
|
||||
|
||||
else:
|
||||
# For HTTP/1.0, the server will assume "not chunked"
|
||||
pass
|
||||
|
||||
def putheader(self, header, value):
|
||||
"""Send a request header line to the server.
|
||||
|
||||
For example: h.putheader('Accept', 'text/html')
|
||||
"""
|
||||
str = '%s: %s\r\n' % (header, value)
|
||||
self.send(str)
|
||||
|
||||
def endheaders(self):
|
||||
"""Indicate that the last header line has been sent to the server."""
|
||||
|
||||
self.send('\r\n')
|
||||
|
||||
def request(self, method, url='/', body=None, headers={}):
|
||||
"""Send a complete request to the server."""
|
||||
|
||||
self.putrequest(method, url)
|
||||
|
||||
if body:
|
||||
self.putheader('Content-Length', str(len(body)))
|
||||
for hdr, value in headers.items():
|
||||
self.putheader(hdr, value)
|
||||
self.endheaders()
|
||||
|
||||
if body:
|
||||
self.send(body)
|
||||
|
||||
def getreply(self):
|
||||
"""Get a reply from the server.
|
||||
|
||||
Returns a tuple consisting of:
|
||||
- server response code (e.g. '200' if all goes well)
|
||||
- server response string corresponding to response code
|
||||
- any RFC822 headers in the response from the server
|
||||
|
||||
"""
|
||||
file = self.sock.makefile('rb')
|
||||
line = file.readline()
|
||||
try:
|
||||
[ver, code, msg] = string.split(line, None, 2)
|
||||
except ValueError:
|
||||
try:
|
||||
[ver, code] = string.split(line, None, 1)
|
||||
msg = ""
|
||||
except ValueError:
|
||||
self.close()
|
||||
return -1, line, file
|
||||
if ver[:5] != 'HTTP/':
|
||||
self.close()
|
||||
return -1, line, file
|
||||
errcode = int(code)
|
||||
errmsg = string.strip(msg)
|
||||
response = self.response_class(file, ver, errcode)
|
||||
if response.will_close:
|
||||
# this effectively passes the connection to the response
|
||||
self.close()
|
||||
else:
|
||||
# remember this, so we can tell when it is complete
|
||||
self.response = response
|
||||
return errcode, errmsg, response
|
||||
|
||||
|
||||
class HTTP(HTTPConnection):
|
||||
"Compatibility class with httplib.py from 1.5."
|
||||
|
||||
_http_vsn = 10
|
||||
_http_vsn_str = 'HTTP/1.0'
|
||||
|
||||
def __init__(self, host='', port=None):
|
||||
"Provide a default host, since the superclass requires one."
|
||||
|
||||
# Note that we may pass an empty string as the host; this will throw
|
||||
# an error when we attempt to connect. Presumably, the client code
|
||||
# will call connect before then, with a proper host.
|
||||
HTTPConnection.__init__(self, host, port)
|
||||
|
||||
def connect(self, host=None, port=None):
|
||||
"Accept arguments to set the host/port, since the superclass doesn't."
|
||||
|
||||
if host:
|
||||
self._set_hostport(host, port)
|
||||
HTTPConnection.connect(self)
|
||||
|
||||
def set_debuglevel(self, debuglevel):
|
||||
"The class no longer supports the debuglevel."
|
||||
pass
|
||||
|
||||
def getfile(self):
|
||||
"Provide a getfile, since the superclass' use of HTTP/1.1 prevents it."
|
||||
return self.file
|
||||
|
||||
def putheader(self, header, *values):
|
||||
"The superclass allows only one value argument."
|
||||
HTTPConnection.putheader(self, header, string.joinfields(values,'\r\n\t'))
|
||||
|
||||
def getreply(self):
|
||||
"Compensate for an instance attribute shuffling."
|
||||
errcode, errmsg, response = HTTPConnection.getreply(self)
|
||||
if errcode == -1:
|
||||
self.file = response # response is the "file" when errcode==-1
|
||||
self.headers = None
|
||||
return -1, errmsg, None
|
||||
|
||||
self.headers = response
|
||||
self.file = response.fp
|
||||
return errcode, errmsg, response
|
||||
87
lc.cgi
Executable file
87
lc.cgi
Executable file
|
|
@ -0,0 +1,87 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import re,cgi,sys,urlparse,time,os
|
||||
|
||||
# configuration
|
||||
sys.stderr = sys.stdout
|
||||
cgi_dir = "/home/calvin/public_html/cgi-bin"
|
||||
dist_dir = "/home/calvin/linkchecker-1.1.0"
|
||||
lc = pylice_dir + "/pylice"
|
||||
sys.path.insert(0,dist_dir)
|
||||
cgi.logfile = cgi_dir + "/lc.log"
|
||||
|
||||
def testit():
|
||||
cgi.test()
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
def checkform():
|
||||
for key in ["level","url"]:
|
||||
if not form.has_key(key) or form[key].value == "": return 0
|
||||
if not re.match(r"^http://[-\w./~]+$", form["url"].value): return 0
|
||||
if not re.match(r"\d", form["level"].value): return 0
|
||||
if int(form["level"].value) > 3: return 0
|
||||
if form.has_key("anchors"):
|
||||
if not form["anchors"].value=="on": return 0
|
||||
if form.has_key("errors"):
|
||||
if not form["errors"].value=="on": return 0
|
||||
if form.has_key("intern"):
|
||||
if not form["intern"].value=="on": return 0
|
||||
return 1
|
||||
|
||||
|
||||
def getHostName():
|
||||
return urlparse.urlparse(form["url"].value)[1]
|
||||
|
||||
|
||||
def logit():
|
||||
logfile = open("/home/calvin/log/linkchecker.log","a")
|
||||
logfile.write("\n"+time.strftime("%d.%m.%Y %H:%M:%S", time.localtime(time.time()))+"\n")
|
||||
for var in ["HTTP_USER_AGENT","REMOTE_ADDR","REMOTE_HOST","REMOTE_PORT"]:
|
||||
if os.environ.has_key(var):
|
||||
logfile.write(var+"="+os.environ[var]+"\n")
|
||||
for key in ["level","url","anchors","errors","intern"]:
|
||||
if form.has_key(key):
|
||||
logfile.write(str(form[key])+"\n")
|
||||
logfile.close()
|
||||
|
||||
|
||||
def printError():
|
||||
print """<html><head></head>
|
||||
<body text="#192c83" bgcolor="#fff7e5" link="#191c83" vlink="#191c83"
|
||||
alink="#191c83" >
|
||||
<blockquote>
|
||||
<b>Error</b><br>
|
||||
The LinkChecker Online script has encountered an error. Please ensure
|
||||
that your provided URL link begins with <code>http://</code> and
|
||||
contains only these characters: <code>A-Za-z0-9./_~-</code><br><br>
|
||||
Errors are logged.
|
||||
</blockquote>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
# main
|
||||
print "Content-type: text/html"
|
||||
print
|
||||
#testit()
|
||||
form = cgi.FieldStorage()
|
||||
if not checkform():
|
||||
logit()
|
||||
printError()
|
||||
sys.exit(0)
|
||||
args=["", "-H", "-r "+form["level"].value, "-s"]
|
||||
if form.has_key("anchors"):
|
||||
args.append("-a")
|
||||
if not form.has_key("errors"):
|
||||
args.append("-v")
|
||||
if form.has_key("intern"):
|
||||
args.append("--intern=^(ftp|http)://"+getHostName())
|
||||
else:
|
||||
args.append("--extern=^file:")
|
||||
args.append("--intern=.+")
|
||||
|
||||
args.append(form["url"].value)
|
||||
sys.argv = args
|
||||
execfile(lc)
|
||||
|
||||
307
linkcheck/Config.py
Normal file
307
linkcheck/Config.py
Normal file
|
|
@ -0,0 +1,307 @@
|
|||
import ConfigParser,sys,os,re,UserDict
|
||||
from os.path import expanduser,normpath,normcase,join,isfile
|
||||
import Logging
|
||||
|
||||
Version = "1.1.0"
|
||||
AppName = "LinkChecker"
|
||||
App = AppName+" "+Version
|
||||
UserAgent = AppName+"/"+Version
|
||||
Author = "Bastian Kleineidam"
|
||||
Copyright = "Copyright © 2000 by "+Author
|
||||
HtmlCopyright = "Copyright © 2000 by "+Author
|
||||
AppInfo = App+" "+Copyright
|
||||
HtmlAppInfo = App+", "+HtmlCopyright
|
||||
Url = "http://pylice.sourceforge.net/"
|
||||
Email = "calvin@users.sourceforge.net"
|
||||
Freeware = AppName+""" comes with ABSOLUTELY NO WARRANTY!
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions. Look at the file `LICENSE' whithin this
|
||||
distribution."""
|
||||
Loggers = {"text": Logging.StandardLogger,
|
||||
"html": Logging.HtmlLogger,
|
||||
"colored": Logging.ColoredLogger,
|
||||
"gml": Logging.GMLLogger,
|
||||
"sql": Logging.SQLLogger}
|
||||
LoggerKeys = reduce(lambda x, y: x+", "+y, Loggers.keys())
|
||||
DebugDelim = "==========================================================\n"
|
||||
DebugFlag = 0
|
||||
|
||||
# note: debugging with more than 1 thread can be painful
|
||||
def debug(msg):
|
||||
if DebugFlag:
|
||||
sys.stderr.write(msg)
|
||||
sys.stderr.flush()
|
||||
|
||||
|
||||
def _norm(path):
|
||||
return normcase(normpath(expanduser(path)))
|
||||
|
||||
|
||||
class Configuration(UserDict.UserDict):
|
||||
def __init__(self):
|
||||
UserDict.UserDict.__init__(self)
|
||||
self.data["log"] = Loggers["text"]()
|
||||
self.data["verbose"] = 0
|
||||
self.data["warnings"] = 0
|
||||
self.data["anchors"] = 0
|
||||
self.data["externlinks"] = []
|
||||
self.data["internlinks"] = []
|
||||
self.data["allowdeny"] = 0
|
||||
self.data["user"] = "anonymous"
|
||||
self.data["password"] = "joe@"
|
||||
self.data["proxy"] = 0
|
||||
self.data["proxyport"] = 8080
|
||||
self.data["recursionlevel"] = 1
|
||||
self.data["robotstxt"] = 0
|
||||
self.data["strict"] = 0
|
||||
self.data["fileoutput"] = []
|
||||
self.data["quiet"] = 0
|
||||
self.urlCache = {}
|
||||
self.robotsTxtCache = {}
|
||||
try:
|
||||
from threading import *
|
||||
self.enableThreading(5)
|
||||
except:
|
||||
type, value = sys.exc_info()[:2]
|
||||
self.disableThreading()
|
||||
|
||||
def disableThreading(self):
|
||||
self.data["threads"] = 0
|
||||
self.hasMoreUrls = self.hasMoreUrls_NoThreads
|
||||
self.finished = self.finished_NoThreads
|
||||
self.finish = self.finish_NoThreads
|
||||
self.appendUrl = self.appendUrl_NoThreads
|
||||
self.getUrl = self.getUrl_NoThreads
|
||||
self.checkUrl = self.checkUrl_NoThreads
|
||||
self.urlCache_has_key = self.urlCache_has_key_NoThreads
|
||||
self.urlCache_get = self.urlCache_get_NoThreads
|
||||
self.urlCache_set = self.urlCache_set_NoThreads
|
||||
self.robotsTxtCache_has_key = self.robotsTxtCache_has_key_NoThreads
|
||||
self.robotsTxtCache_get = self.robotsTxtCache_get_NoThreads
|
||||
self.robotsTxtCache_set = self.robotsTxtCache_set_NoThreads
|
||||
self.log_newUrl = self.log_newUrl_NoThreads
|
||||
self.urls = []
|
||||
self.threader = None
|
||||
|
||||
def enableThreading(self, num):
|
||||
import Queue,Threader
|
||||
from threading import Lock
|
||||
self.data["threads"] = 1
|
||||
self.hasMoreUrls = self.hasMoreUrls_Threads
|
||||
self.finished = self.finished_Threads
|
||||
self.finish = self.finish_Threads
|
||||
self.appendUrl = self.appendUrl_Threads
|
||||
self.getUrl = self.getUrl_Threads
|
||||
self.checkUrl = self.checkUrl_Threads
|
||||
self.urlCache_has_key = self.urlCache_has_key_Threads
|
||||
self.urlCache_get = self.urlCache_get_Threads
|
||||
self.urlCache_set = self.urlCache_set_Threads
|
||||
self.urlCacheLock = Lock()
|
||||
self.robotsTxtCache_has_key = self.robotsTxtCache_has_key_Threads
|
||||
self.robotsTxtCache_get = self.robotsTxtCache_get_Threads
|
||||
self.robotsTxtCache_set = self.robotsTxtCache_set_Threads
|
||||
self.robotsTxtCacheLock = Lock()
|
||||
self.log_newUrl = self.log_newUrl_Threads
|
||||
self.logLock = Lock()
|
||||
self.urls = Queue.Queue(0)
|
||||
self.threader = Threader.Threader(num)
|
||||
|
||||
def hasMoreUrls_NoThreads(self):
|
||||
return len(self.urls)
|
||||
|
||||
def finished_NoThreads(self):
|
||||
return not self.hasMoreUrls_NoThreads()
|
||||
|
||||
def finish_NoThreads(self):
|
||||
pass
|
||||
|
||||
def appendUrl_NoThreads(self, url):
|
||||
self.urls.append(url)
|
||||
|
||||
def getUrl_NoThreads(self):
|
||||
return self.urls.pop(0)
|
||||
|
||||
def checkUrl_NoThreads(self, url):
|
||||
url.check(self)
|
||||
|
||||
def urlCache_has_key_NoThreads(self, key):
|
||||
return self.urlCache.has_key(key)
|
||||
|
||||
def urlCache_get_NoThreads(self, key):
|
||||
return self.urlCache[key]
|
||||
|
||||
def urlCache_set_NoThreads(self, key, val):
|
||||
self.urlCache[key] = val
|
||||
|
||||
def robotsTxtCache_has_key_NoThreads(self, key):
|
||||
return self.robotsTxtCache.has_key(key)
|
||||
|
||||
def robotsTxtCache_get_NoThreads(self, key):
|
||||
return self.robotsTxtCache[key]
|
||||
|
||||
def robotsTxtCache_set_NoThreads(self, key, val):
|
||||
self.robotsTxtCache[key] = val
|
||||
|
||||
def log_newUrl_NoThreads(self, url):
|
||||
if not self.data["quiet"]: self.data["log"].newUrl(url)
|
||||
for log in self.data["fileoutput"]:
|
||||
log.newUrl(url)
|
||||
|
||||
def log_init(self):
|
||||
if not self.data["quiet"]: self.data["log"].init()
|
||||
for log in self.data["fileoutput"]:
|
||||
log.init()
|
||||
|
||||
def log_endOfOutput(self):
|
||||
if not self.data["quiet"]: self.data["log"].endOfOutput()
|
||||
for log in self.data["fileoutput"]:
|
||||
log.endOfOutput()
|
||||
|
||||
def hasMoreUrls_Threads(self):
|
||||
return not self.urls.empty()
|
||||
|
||||
def finished_Threads(self):
|
||||
self.threader.reduceThreads()
|
||||
return not self.hasMoreUrls() and self.threader.finished()
|
||||
|
||||
def finish_Threads(self):
|
||||
self.threader.finish()
|
||||
|
||||
def appendUrl_Threads(self, url):
|
||||
self.urls.put(url)
|
||||
|
||||
def getUrl_Threads(self):
|
||||
return self.urls.get()
|
||||
|
||||
def checkUrl_Threads(self, url):
|
||||
self.threader.startThread(url.check, (self,))
|
||||
|
||||
def urlCache_has_key_Threads(self, key):
|
||||
self.urlCacheLock.acquire()
|
||||
ret = self.urlCache.has_key(key)
|
||||
self.urlCacheLock.release()
|
||||
return ret
|
||||
|
||||
def urlCache_get_Threads(self, key):
|
||||
self.urlCacheLock.acquire()
|
||||
ret = self.urlCache[key]
|
||||
self.urlCacheLock.release()
|
||||
return ret
|
||||
|
||||
def urlCache_set_Threads(self, key, val):
|
||||
self.urlCacheLock.acquire()
|
||||
self.urlCache[key] = val
|
||||
self.urlCacheLock.release()
|
||||
|
||||
def robotsTxtCache_has_key_Threads(self, key):
|
||||
self.robotsTxtCacheLock.acquire()
|
||||
ret = self.robotsTxtCache.has_key(key)
|
||||
self.robotsTxtCacheLock.release()
|
||||
return ret
|
||||
|
||||
def robotsTxtCache_get_Threads(self, key):
|
||||
self.robotsTxtCacheLock.acquire()
|
||||
ret = self.robotsTxtCache[key]
|
||||
self.robotsTxtCacheLock.release()
|
||||
return ret
|
||||
|
||||
def robotsTxtCache_set_Threads(self, key, val):
|
||||
self.robotsTxtCacheLock.acquire()
|
||||
self.robotsTxtCache[key] = val
|
||||
self.robotsTxtCacheLock.release()
|
||||
|
||||
def log_newUrl_Threads(self, url):
|
||||
self.logLock.acquire()
|
||||
if not self.data["quiet"]: self.data["log"].newUrl(url)
|
||||
for log in self.data["fileoutput"]:
|
||||
log.newUrl(url)
|
||||
self.logLock.release()
|
||||
|
||||
def read(self, files = []):
|
||||
files.insert(0,_norm("~/.pylicerc"))
|
||||
if sys.platform=="win32":
|
||||
if not sys.path[0]:
|
||||
path=os.getcwd()
|
||||
else:
|
||||
path=sys.path[0]
|
||||
else:
|
||||
path="/etc"
|
||||
files.insert(0,_norm(join(path, "pylicerc")))
|
||||
if len(files):
|
||||
self.readConfig(files)
|
||||
|
||||
def warn(self, msg):
|
||||
self.message("Config: WARNING: "+msg)
|
||||
|
||||
def error(self, msg):
|
||||
self.message("Config: ERROR: "+msg)
|
||||
|
||||
def message(self, msg):
|
||||
sys.stderr.write(msg+"\n")
|
||||
sys.stderr.flush()
|
||||
|
||||
def readConfig(self, files):
|
||||
try:
|
||||
cfgparser = ConfigParser.ConfigParser()
|
||||
cfgparser.read(files)
|
||||
except: return
|
||||
|
||||
section="output"
|
||||
try:
|
||||
log = cfgparser.get(section, "log")
|
||||
if Loggers.has_key(log):
|
||||
self.data["log"] = Loggers[log]()
|
||||
else:
|
||||
self.warn("invalid log option "+log)
|
||||
except: pass
|
||||
try:
|
||||
if cfgparser.getboolean(section, "verbose"):
|
||||
self.data["verbose"] = 1
|
||||
self.data["warnings"] = 1
|
||||
except: pass
|
||||
try: self.data["quiet"] = cfgparser.getboolean(section, "quiet")
|
||||
except: pass
|
||||
try: self.data["warnings"] = cfgparser.getboolean(section, "warnings")
|
||||
except: pass
|
||||
|
||||
section="checking"
|
||||
try:
|
||||
num = cfgparser.getint(section, "threads")
|
||||
if num<=0:
|
||||
self.disableThreads()
|
||||
else:
|
||||
self.enableThreads(num)
|
||||
except: pass
|
||||
try: self.data["anchors"] = cfgparser.getboolean(section, "anchors")
|
||||
except: pass
|
||||
try: self.data["externlinks"].append(re.compile(cfgparser.get(section, "externlinks")))
|
||||
except: pass
|
||||
try: self.data["internlinks"].append(re.compile(cfgparser.get(section, "internlinks")))
|
||||
except: pass
|
||||
try: self.data["allowdeny"] = cfgparser.getboolean(section, "allowdeny")
|
||||
except: pass
|
||||
try: self.data["password"] = cfgparser.get(section, "password")
|
||||
except: pass
|
||||
try: self.data["user"] = cfgparser.get(section, "user")
|
||||
except: pass
|
||||
try:
|
||||
self.data["proxy"] = cfgparser.get(section, "proxy")
|
||||
self.data["proxyport"] = cfgparser.getint(section, "proxyport")
|
||||
except: pass
|
||||
try:
|
||||
num = cfgparser.getint(section, "recursionlevel")
|
||||
if num<0:
|
||||
self.error("illegal recursionlevel number: "+`num`)
|
||||
self.data["recursionlevel"] = num
|
||||
except: pass
|
||||
try: self.data["robotstxt"] = cfgparser.getboolean(section, "robotstxt")
|
||||
except: pass
|
||||
try: self.data["strict"] = cfgparser.getboolean(section, "strict")
|
||||
except: pass
|
||||
try:
|
||||
filelist = string.split(cfgparser.get(section, "fileoutput"))
|
||||
for arg in filelist:
|
||||
if Loggers.has_key(arg):
|
||||
self.data["fileoutput"].append(Loggers[arg](open("pylice-out."+arg, "w")))
|
||||
except:
|
||||
pass
|
||||
48
linkcheck/FileUrlData.py
Normal file
48
linkcheck/FileUrlData.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
import re,string,os,urlparse
|
||||
from UrlData import UrlData
|
||||
from os.path import normpath
|
||||
|
||||
class FileUrlData(UrlData):
|
||||
"Url link with file scheme"
|
||||
|
||||
def __init__(self,
|
||||
urlName,
|
||||
recursionLevel,
|
||||
parentName = None,
|
||||
baseRef = None, line=0, _time=0):
|
||||
UrlData.__init__(self,
|
||||
urlName,
|
||||
recursionLevel,
|
||||
parentName,
|
||||
baseRef, line, _time)
|
||||
if not parentName and not baseRef and \
|
||||
not re.compile("^file:").search(self.urlName):
|
||||
winre = re.compile("^[a-zA-Z]:")
|
||||
if winre.search(self.urlName):
|
||||
self.adjustWindozePath()
|
||||
else:
|
||||
if self.urlName[0:1] != "/":
|
||||
self.urlName = os.getcwd()+"/"+self.urlName
|
||||
if winre.search(self.urlName):
|
||||
self.adjustWindozePath()
|
||||
self.urlName = "file://"+normpath(self.urlName)
|
||||
|
||||
|
||||
def buildUrl(self):
|
||||
UrlData.buildUrl(self)
|
||||
# cut off parameter, query and fragment
|
||||
self.url = urlparse.urlunparse(self.urlTuple[:3] + ('','',''))
|
||||
|
||||
|
||||
def adjustWindozePath(self):
|
||||
"c:\\windows ==> /c|\\windows"
|
||||
self.urlName = "/"+self.urlName[0]+"|"+self.urlName[2:]
|
||||
|
||||
|
||||
def isHtml(self):
|
||||
return self.valid and re.compile("\.s?html?$").search(self.url)
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return "File link\n"+UrlData.__str__(self)
|
||||
|
||||
26
linkcheck/FtpUrlData.py
Normal file
26
linkcheck/FtpUrlData.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
import ftplib
|
||||
from UrlData import UrlData
|
||||
|
||||
class FtpUrlData(UrlData):
|
||||
"""
|
||||
Url link with ftp scheme.
|
||||
"""
|
||||
|
||||
def checkConnection(self, config):
|
||||
self.urlConnection = ftplib.FTP(self.urlTuple[1],
|
||||
config["user"], config["password"])
|
||||
info = self.urlConnection.getwelcome()
|
||||
if not info:
|
||||
self.closeConnection()
|
||||
raise Exception, "Got no answer from FTP server"
|
||||
self.setInfo(info)
|
||||
|
||||
def closeConnection(self):
|
||||
try: self.urlConnection.quit()
|
||||
except: pass
|
||||
self.urlConnection = None
|
||||
|
||||
def __str__(self):
|
||||
return "FTP link\n"+UrlData.__str__(self)
|
||||
|
||||
|
||||
9
linkcheck/GopherUrlData.py
Normal file
9
linkcheck/GopherUrlData.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
from UrlData import UrlData
|
||||
|
||||
class GopherUrlData(UrlData):
|
||||
"Url link with gopher scheme"
|
||||
|
||||
def __str__(self):
|
||||
return "Gopher link\n"+UrlData.__str__(self)
|
||||
|
||||
|
||||
33
linkcheck/HostCheckingUrlData.py
Normal file
33
linkcheck/HostCheckingUrlData.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
import socket,string
|
||||
from UrlData import UrlData
|
||||
|
||||
class HostCheckingUrlData(UrlData):
|
||||
"Url link for which we have to connect to a specific host"
|
||||
|
||||
def __init__(self,
|
||||
urlName,
|
||||
recursionLevel,
|
||||
parentName = None,
|
||||
baseRef = None, line=0, _time=0):
|
||||
UrlData.__init__(self, urlName, recursionLevel, parentName, baseRef,
|
||||
line, _time)
|
||||
self.host = None
|
||||
self.url = urlName
|
||||
|
||||
def buildUrl(self):
|
||||
# to avoid anchor checking
|
||||
self.urlTuple=None
|
||||
|
||||
def getCacheKey(self):
|
||||
return self.host
|
||||
|
||||
def checkConnection(self, config):
|
||||
ip = socket.gethostbyname(self.host)
|
||||
self.setValid(self.host+"("+ip+") found")
|
||||
|
||||
def closeConnection(self):
|
||||
UrlData.closeConnection(self)
|
||||
|
||||
def __str__(self):
|
||||
return "host="+`self.host`+"\n"+UrlData.__str__(self)
|
||||
|
||||
150
linkcheck/HttpUrlData.py
Normal file
150
linkcheck/HttpUrlData.py
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
import httplib,urlparse,sys,base64,time
|
||||
from UrlData import UrlData
|
||||
from RobotsTxt import RobotsTxt
|
||||
import Config,StringUtil
|
||||
|
||||
class HttpUrlData(UrlData):
|
||||
"Url link with http scheme"
|
||||
|
||||
def checkConnection(self, config):
|
||||
"""
|
||||
Check a URL with HTTP protocol.
|
||||
Here is an excerpt from RFC 1945 with common response codes:
|
||||
The first digit of the Status-Code defines the class of response. The
|
||||
last two digits do not have any categorization role. There are 5
|
||||
values for the first digit:
|
||||
o 1xx: Informational - Not used, but reserved for future use
|
||||
o 2xx: Success - The action was successfully received,
|
||||
understood, and accepted.
|
||||
o 3xx: Redirection - Further action must be taken in order to
|
||||
complete the request
|
||||
o 4xx: Client Error - The request contains bad syntax or cannot
|
||||
be fulfilled
|
||||
o 5xx: Server Error - The server failed to fulfill an apparently
|
||||
valid request
|
||||
The individual values of the numeric status codes defined for
|
||||
HTTP/1.0, and an example set of corresponding Reason-Phrase's, are
|
||||
presented below. The reason phrases listed here are only recommended
|
||||
-- they may be replaced by local equivalents without affecting the
|
||||
protocol. These codes are fully defined in Section 9.
|
||||
Status-Code = "200" ; OK
|
||||
| "201" ; Created
|
||||
| "202" ; Accepted
|
||||
| "204" ; No Content
|
||||
| "301" ; Moved Permanently
|
||||
| "302" ; Moved Temporarily
|
||||
| "304" ; Not Modified
|
||||
| "400" ; Bad Request
|
||||
| "401" ; Unauthorized
|
||||
| "403" ; Forbidden
|
||||
| "404" ; Not Found
|
||||
| "500" ; Internal Server Error
|
||||
| "501" ; Not Implemented
|
||||
| "502" ; Bad Gateway
|
||||
| "503" ; Service Unavailable
|
||||
| extension-code
|
||||
"""
|
||||
|
||||
self.mime = None
|
||||
self.auth = None
|
||||
self.proxy = config["proxy"]
|
||||
self.proxyport = config["proxyport"]
|
||||
if config["robotstxt"] and not self.robotsTxtAllowsUrl(config):
|
||||
self.setWarning("Access denied by robots.txt, checked only syntax")
|
||||
return
|
||||
|
||||
status, statusText, self.mime = self.getHttpRequest()
|
||||
Config.debug(str(self.mime))
|
||||
if status == 401:
|
||||
self.auth = base64.encodestring(LinkChecker.User+":"+LinkChecker.Password)
|
||||
status, statusText, self.mime = self.getHttpRequest()
|
||||
if status >= 400:
|
||||
self.setError(`status`+" "+statusText)
|
||||
return
|
||||
|
||||
# follow redirections and set self.url to the effective url
|
||||
tries = 0
|
||||
redirected = self.urlName
|
||||
while status in [301,302] and self.mime and tries < 5:
|
||||
redirected = urlparse.urljoin(redirected, self.mime.getheader("Location"))
|
||||
self.urlTuple = urlparse.urlparse(redirected)
|
||||
status, statusText, self.mime = self.getHttpRequest()
|
||||
Config.debug("\nRedirected\n"+str(self.mime))
|
||||
tries = tries + 1
|
||||
|
||||
effectiveurl = urlparse.urlunparse(self.urlTuple)
|
||||
if self.url != effectiveurl:
|
||||
self.setWarning("Effective URL "+effectiveurl)
|
||||
self.url = effectiveurl
|
||||
|
||||
# check final result
|
||||
if status == 204:
|
||||
self.setWarning(statusText)
|
||||
if status >= 400:
|
||||
self.setError(`status`+" "+statusText)
|
||||
else:
|
||||
self.setValid(`status`+" "+statusText)
|
||||
|
||||
|
||||
def getHttpRequest(self, method="HEAD"):
|
||||
"Put request and return (status code, status text, mime object)"
|
||||
if self.proxy:
|
||||
host = self.proxy+":"+`self.proxyport`
|
||||
else:
|
||||
host = self.urlTuple[1]
|
||||
if self.urlConnection:
|
||||
self.closeConnection()
|
||||
self.urlConnection = httplib.HTTP(host)
|
||||
if self.proxy:
|
||||
path = urlparse.urlunparse(self.urlTuple)
|
||||
else:
|
||||
path = self.urlTuple[2]
|
||||
if self.urlTuple[3] != "":
|
||||
path = path + ";" + self.urlTuple[3]
|
||||
if self.urlTuple[4] != "":
|
||||
path = path + "?" + self.urlTuple[4]
|
||||
self.urlConnection.putrequest(method, path)
|
||||
if self.auth:
|
||||
self.urlConnection.putheader("Authorization", "Basic "+self.auth)
|
||||
self.urlConnection.putheader("User-agent", Config.UserAgent)
|
||||
self.urlConnection.endheaders()
|
||||
return self.urlConnection.getreply()
|
||||
|
||||
def getContent(self):
|
||||
self.closeConnection()
|
||||
t = time.time()
|
||||
self.getHttpRequest("GET")
|
||||
self.urlConnection = self.urlConnection.getfile()
|
||||
data = StringUtil.stripHtmlComments(self.urlConnection.read())
|
||||
self.time = time.time() - t
|
||||
return data
|
||||
|
||||
def isHtml(self):
|
||||
if self.mime:
|
||||
return self.valid and self.mime.gettype()=="text/html"
|
||||
return 0
|
||||
|
||||
def robotsTxtAllowsUrl(self, config):
|
||||
try:
|
||||
if config.robotsTxtCache_has_key(self.urlTuple[1]):
|
||||
robotsTxt = config.robotsTxtCache_get(self.urlTuple[1])
|
||||
else:
|
||||
robotsTxt = RobotsTxt(self.urlTuple[1], Config.UserAgent)
|
||||
Config.debug("DEBUG: "+str(robotsTxt)+"\n")
|
||||
config.robotsTxtCache_set(self.urlTuple[1], robotsTxt)
|
||||
except:
|
||||
type, value = sys.exc_info()[:2]
|
||||
Config.debug("Heieiei: "+str(value)+"\n")
|
||||
return 1
|
||||
return robotsTxt.allowance(Config.UserAgent, self.urlTuple[2])
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return "HTTP link\n"+UrlData.__str__(self)
|
||||
|
||||
def closeConnection(self):
|
||||
if self.mime:
|
||||
try: self.mime.close()
|
||||
except: pass
|
||||
self.mime = None
|
||||
UrlData.closeConnection(self)
|
||||
13
linkcheck/HttpsUrlData.py
Normal file
13
linkcheck/HttpsUrlData.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
from UrlData import UrlData
|
||||
|
||||
class HttpsUrlData(UrlData):
|
||||
"Url link with https scheme"
|
||||
|
||||
def check(self, config):
|
||||
self.setWarning("Https url ignored")
|
||||
self.logMe(config)
|
||||
|
||||
def __str__(self):
|
||||
return "HTTPS link\n"+UrlData.__str__(self)
|
||||
|
||||
|
||||
13
linkcheck/JavascriptUrlData.py
Normal file
13
linkcheck/JavascriptUrlData.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
from UrlData import UrlData
|
||||
|
||||
class JavascriptUrlData(UrlData):
|
||||
"Url link with javascript scheme"
|
||||
|
||||
def check(self, config):
|
||||
self.setWarning("Javascript url ignored")
|
||||
self.logMe(config)
|
||||
|
||||
def __str__(self):
|
||||
return "Javascript link\n"+UrlData.__str__(self)
|
||||
|
||||
|
||||
360
linkcheck/Logging.py
Normal file
360
linkcheck/Logging.py
Normal file
|
|
@ -0,0 +1,360 @@
|
|||
import sys,time,Config,StringUtil
|
||||
|
||||
# ANSI color codes
|
||||
ESC="\x1b"
|
||||
COL_PARENT =ESC+"[37m" # white
|
||||
COL_URL =ESC+"[35m" # magenta
|
||||
COL_REAL =ESC+"[35m" # magenta
|
||||
COL_BASE =ESC+"[36m" # cyan
|
||||
COL_VALID =ESC+"[1;32m" # green
|
||||
COL_INVALID =ESC+"[1;31m" # red
|
||||
COL_INFO =ESC+"[0;37m" # standard
|
||||
COL_WARNING =ESC+"[1;33m" # yellow
|
||||
COL_DLTIME =ESC+"[0;37m" # standard
|
||||
COL_RESET =ESC+"[0m" # reset to standard
|
||||
|
||||
# HTML colors
|
||||
ColorBackground="\"#fff7e5\""
|
||||
ColorUrl="\"#dcd5cf\""
|
||||
ColorBorder="\"#000000\""
|
||||
ColorLink="\"#191c83\""
|
||||
TableWarning="<td bgcolor=\"#e0954e\">"
|
||||
TableError="<td bgcolor=\"db4930\">"
|
||||
TableOK="<td bgcolor=\"3ba557\">"
|
||||
RowEnd="</td></tr>\n"
|
||||
MyFont="<font face=\"Lucida,Verdana,Arial,sans-serif,Helvetica\">"
|
||||
|
||||
# return current time
|
||||
def _currentTime():
|
||||
return time.strftime("%d.%m.%Y %H:%M:%S", time.localtime(time.time()))
|
||||
|
||||
class StandardLogger:
|
||||
"""Standard text logger.
|
||||
Informal text output format spec:
|
||||
Output consists of a set of URL logs separated by one or more
|
||||
blank lines.
|
||||
A URL log consists of two or more lines. Each line consists of
|
||||
keyword and data, separated by whitespace.
|
||||
Keywords:
|
||||
Real URL (necessary)
|
||||
Result (necessary)
|
||||
Base
|
||||
Parent URL
|
||||
Info
|
||||
Warning
|
||||
D/L Time
|
||||
|
||||
Unknown keywords will be ignored.
|
||||
"""
|
||||
|
||||
def __init__(self, fd=sys.stdout):
|
||||
self.errors=0
|
||||
self.warnings=0
|
||||
self.fd = fd
|
||||
if fd==sys.stdout:
|
||||
self.willclose=0
|
||||
else:
|
||||
self.willclose=1
|
||||
|
||||
|
||||
def init(self):
|
||||
self.fd.write(Config.AppName+"\n"+\
|
||||
Config.Freeware+"\n"+\
|
||||
"Get the newest version at "+Config.Url+"\n"+\
|
||||
"Write comments and bugs to "+Config.Email+"\n\n"+\
|
||||
"Start checking at "+_currentTime()+"\n")
|
||||
self.fd.flush()
|
||||
|
||||
|
||||
def newUrl(self, urldata):
|
||||
self.fd.write("\nURL "+urldata.urlName)
|
||||
if urldata.cached:
|
||||
self.fd.write(" (cached)\n")
|
||||
else:
|
||||
self.fd.write("\n")
|
||||
if urldata.parentName:
|
||||
self.fd.write("Parent URL "+urldata.parentName+", line "+str(urldata.line)+"\n")
|
||||
if urldata.baseRef:
|
||||
self.fd.write("Base "+urldata.baseRef+"\n")
|
||||
if urldata.url:
|
||||
self.fd.write("Real URL "+urldata.url+"\n")
|
||||
if urldata.time:
|
||||
self.fd.write("D/L Time %.3f seconds\n" % urldata.time)
|
||||
if urldata.infoString:
|
||||
self.fd.write("Info "+StringUtil.indent(\
|
||||
StringUtil.blocktext(urldata.infoString, 65), 11)+"\n")
|
||||
if urldata.warningString:
|
||||
self.warnings = self.warnings+1
|
||||
self.fd.write("Warning "+urldata.warningString+"\n")
|
||||
|
||||
self.fd.write("Result ")
|
||||
if urldata.valid:
|
||||
self.fd.write(urldata.validString+"\n")
|
||||
else:
|
||||
self.errors = self.errors+1
|
||||
self.fd.write(urldata.errorString+"\n")
|
||||
self.fd.flush()
|
||||
|
||||
|
||||
def endOfOutput(self):
|
||||
self.fd.write("\nThats it. ")
|
||||
|
||||
if self.warnings==1:
|
||||
self.fd.write("1 warning, ")
|
||||
else:
|
||||
self.fd.write(str(self.warnings)+" warnings, ")
|
||||
if self.errors==1:
|
||||
self.fd.write("1 error")
|
||||
else:
|
||||
self.fd.write(str(self.errors)+" errors")
|
||||
self.fd.write(" found.\n")
|
||||
self.fd.write("Stopped checking at "+_currentTime()+"\n")
|
||||
self.fd.flush()
|
||||
self.close()
|
||||
|
||||
def close(self):
|
||||
if self.willclose:
|
||||
self.fd.close()
|
||||
|
||||
|
||||
class HtmlLogger(StandardLogger):
|
||||
"""Logger with HTML output"""
|
||||
|
||||
def init(self):
|
||||
self.fd.write("<html><head><title>"+Config.AppName+"</title></head>"+\
|
||||
"<body bgcolor="+ColorBackground+" link="+ColorLink+\
|
||||
" vlink="+ColorLink+" alink="+ColorLink+">"+\
|
||||
"<center><h2>"+MyFont+Config.AppName+"</font>"+\
|
||||
"</center></h2>"+\
|
||||
"<br><blockquote>"+Config.Freeware+"<br><br>"+\
|
||||
"Start checking at "+_currentTime()+"<br><br>")
|
||||
self.fd.flush()
|
||||
|
||||
|
||||
def newUrl(self, urlData):
|
||||
self.fd.write("<table align=left border=\"0\" cellspacing=\"0\""+\
|
||||
" cellpadding=\"1\" bgcolor="+ColorBorder+">"+\
|
||||
"<tr><td><table align=left border=\"0\" cellspacing=\"0\""+\
|
||||
" cellpadding=\"3\" bgcolor="+ColorBackground+">"+\
|
||||
"<tr><td bgcolor="+ColorUrl+">"+\
|
||||
MyFont+"URL</font></td><td bgcolor="+ColorUrl+">"+MyFont+\
|
||||
StringUtil.htmlify(urlData.urlName))
|
||||
if urlData.cached:
|
||||
self.fd.write("(cached)")
|
||||
self.fd.write("</font>"+RowEnd)
|
||||
|
||||
if urlData.parentName:
|
||||
self.fd.write("<tr><td>"+MyFont+"Parent URL</font></td><td>"+\
|
||||
MyFont+"<a href=\""+urlData.parentName+"\">"+\
|
||||
urlData.parentName+"</a> line "+str(urlData.line)+\
|
||||
"</font>"+RowEnd)
|
||||
if urlData.baseRef:
|
||||
self.fd.write("<tr><td>"+MyFont+"Base</font></td><td>"+MyFont+\
|
||||
urlData.baseRef+"</font>"+RowEnd)
|
||||
if urlData.url:
|
||||
self.fd.write("<tr><td>"+MyFont+"Real URL</font></td><td>"+MyFont+\
|
||||
"<a href=\""+StringUtil.htmlify(urlData.url)+"\">"+\
|
||||
urlData.url+"</a></font>"+RowEnd)
|
||||
if urlData.time:
|
||||
self.fd.write("<tr><td>"+MyFont+"D/L Time</font></td><td>"+MyFont+\
|
||||
("%.3f" % urlData.time)+" seconds</font>"+RowEnd)
|
||||
if urlData.infoString:
|
||||
self.fd.write("<tr><td>"+MyFont+"Info</font></td><td>"+MyFont+\
|
||||
StringUtil.htmlify(urlData.infoString)+"</font>"+RowEnd)
|
||||
if urlData.warningString:
|
||||
self.warnings = self.warnings+1
|
||||
self.fd.write("<tr>"+TableWarning+MyFont+"Warning</font></td>"+\
|
||||
TableWarning+MyFont+urlData.warningString+\
|
||||
"</font>"+RowEnd)
|
||||
if urlData.valid:
|
||||
self.fd.write("<tr>"+TableOK+MyFont+"Result</font></td>"+\
|
||||
TableOK+MyFont+urlData.validString+"</font>"+RowEnd)
|
||||
else:
|
||||
self.errors = self.errors+1
|
||||
self.fd.write("<tr>"+TableError+MyFont+"Result</font></td>"+\
|
||||
TableError+MyFont+urlData.errorString+"</font>"+RowEnd)
|
||||
|
||||
self.fd.write("</table></td></tr></table><br clear=all><br>")
|
||||
self.fd.flush()
|
||||
|
||||
|
||||
def endOfOutput(self):
|
||||
self.fd.write(MyFont+"Thats it. ")
|
||||
if self.warnings==1:
|
||||
self.fd.write("1 warning, ")
|
||||
else:
|
||||
self.fd.write(str(self.warnings)+" warnings, ")
|
||||
if self.errors==1:
|
||||
self.fd.write("1 error")
|
||||
else:
|
||||
self.fd.write(str(self.errors)+" errors")
|
||||
self.fd.write(" found.<br>")
|
||||
self.fd.write("Stopped checking at"+_currentTime()+\
|
||||
"</font></blockquote><br><hr noshade size=1><small>"+\
|
||||
MyFont+Config.HtmlAppInfo+"<br>Get the newest version at "+\
|
||||
"<a href=\""+Config.Url+"\">"+Config.Url+"</a>.<br>"+\
|
||||
"Write comments and bugs to <a href=\"mailto:"+\
|
||||
Config.Email+"\">"+Config.Email+"</a>."+\
|
||||
"</font></small></body></html>")
|
||||
self.fd.flush()
|
||||
self.close()
|
||||
|
||||
|
||||
class ColoredLogger(StandardLogger):
|
||||
"""ANSI colorized output"""
|
||||
|
||||
def __init__(self, fd=sys.stdout):
|
||||
StandardLogger.__init__(self, fd)
|
||||
self.currentPage = None
|
||||
self.prefix = 0
|
||||
|
||||
def newUrl(self, urlData):
|
||||
if urlData.parentName:
|
||||
if self.currentPage != urlData.parentName:
|
||||
if self.prefix:
|
||||
self.fd.write("o\n")
|
||||
self.fd.write("\nParent URL "+COL_PARENT+urlData.parentName+\
|
||||
COL_RESET+"\n")
|
||||
self.prefix = 1
|
||||
self.currentPage = urlData.parentName
|
||||
else:
|
||||
self.prefix = 0
|
||||
|
||||
if self.prefix:
|
||||
self.fd.write("|\n+- ")
|
||||
else:
|
||||
self.fd.write("\n")
|
||||
self.fd.write("URL "+COL_URL+urlData.urlName+COL_RESET)
|
||||
if urlData.line: self.fd.write(" (line "+`urlData.line`+")")
|
||||
if urlData.cached:
|
||||
self.fd.write("(cached)\n")
|
||||
else:
|
||||
self.fd.write("\n")
|
||||
|
||||
if urlData.baseRef:
|
||||
if self.prefix:
|
||||
self.fd.write("| ")
|
||||
self.fd.write("Base "+COL_BASE+urlData.baseRef+COL_RESET+"\n")
|
||||
|
||||
if urlData.url:
|
||||
if self.prefix:
|
||||
self.fd.write("| ")
|
||||
self.fd.write("Real URL "+COL_REAL+urlData.url+COL_RESET+"\n")
|
||||
if urlData.time:
|
||||
if self.prefix:
|
||||
self.fd.write("| ")
|
||||
self.fd.write("D/L Time "+COL_DLTIME+("%.3f" % urlData.time)+" seconds"+\
|
||||
COL_RESET+"\n")
|
||||
|
||||
if urlData.infoString:
|
||||
if self.prefix:
|
||||
self.fd.write("| Info "+\
|
||||
StringUtil.indentWith(StringUtil.blocktext(\
|
||||
urlData.infoString, 65), "| "))
|
||||
else:
|
||||
self.fd.write("Info "+\
|
||||
StringUtil.indentWith(StringUtil.blocktext(\
|
||||
urlData.infoString, 65), " "))
|
||||
self.fd.write(COL_RESET+"\n")
|
||||
|
||||
if urlData.warningString:
|
||||
self.warnings = self.warnings+1
|
||||
if self.prefix:
|
||||
self.fd.write("| ")
|
||||
self.fd.write("Warning "+COL_WARNING+urlData.warningString+\
|
||||
COL_RESET+"\n")
|
||||
|
||||
if self.prefix:
|
||||
self.fd.write("| ")
|
||||
self.fd.write("Result ")
|
||||
if urlData.valid:
|
||||
self.fd.write(COL_VALID+urlData.validString+COL_RESET+"\n")
|
||||
else:
|
||||
self.errors = self.errors+1
|
||||
self.fd.write(COL_INVALID+urlData.errorString+COL_RESET+"\n")
|
||||
self.fd.flush()
|
||||
|
||||
|
||||
def endOfOutput(self):
|
||||
if self.prefix:
|
||||
self.fd.write("o\n")
|
||||
StandardLogger.endOfOutput(self)
|
||||
|
||||
|
||||
class GMLLogger(StandardLogger):
|
||||
|
||||
def __init__(self,fd=sys.stdout):
|
||||
StandardLogger.__init__(self,fd)
|
||||
self.nodes = []
|
||||
|
||||
def init(self):
|
||||
self.fd.write("graph [\n Creator \""+Config.AppName+\
|
||||
"\"\n comment \"you get pylice at "+Config.Url+\
|
||||
"\"\n comment \"write comments and bugs to "+Config.Email+\
|
||||
"\"\n directed 1\n")
|
||||
self.fd.flush()
|
||||
|
||||
def newUrl(self, urlData):
|
||||
self.nodes.append(urlData)
|
||||
|
||||
def endOfOutput(self):
|
||||
writtenNodes = {}
|
||||
# write nodes
|
||||
nodeid = 1
|
||||
for node in self.nodes:
|
||||
if node.url and not writtenNodes.has_key(node.url):
|
||||
self.fd.write(" node [\n id "+`nodeid`+"\n label \""+
|
||||
node.url+"\"\n ]\n")
|
||||
writtenNodes[node.url] = nodeid
|
||||
nodeid = nodeid + 1
|
||||
# write edges
|
||||
for node in self.nodes:
|
||||
if node.url and node.parentName:
|
||||
self.fd.write(" edge [\n label \""+node.urlName+\
|
||||
"\"\n source "+`writtenNodes[node.parentName]`+\
|
||||
"\n target "+`writtenNodes[node.url]`+\
|
||||
"\n ]\n")
|
||||
# end of output
|
||||
self.fd.write("]\n")
|
||||
self.fd.flush()
|
||||
self.close()
|
||||
|
||||
|
||||
class SQLLogger(StandardLogger):
|
||||
""" SQL output, only tested with PostgreSQL"""
|
||||
|
||||
def init(self):
|
||||
self.fd.write("-- created by "+Config.AppName+" at "+_currentTime()+\
|
||||
"\n-- you get pylice at "+Config.Url+\
|
||||
"\n-- write comments and bugs to "+Config.Email+"\n\n")
|
||||
self.fd.flush()
|
||||
|
||||
def newUrl(self, urlData):
|
||||
self.fd.write("insert into pylicedb(urlname,"+\
|
||||
"recursionlevel,"+\
|
||||
"parentname,"+\
|
||||
"baseref,"+\
|
||||
"errorstring,"+\
|
||||
"validstring,"+\
|
||||
"warningstring,"+\
|
||||
"infoString,"+\
|
||||
"valid,"+\
|
||||
"url,"+\
|
||||
"line,"+\
|
||||
"cached) values ")
|
||||
self.fd.write("'"+urlData.urlName+"',"+\
|
||||
`urlData.recursionLevel`+","+\
|
||||
StringUtil.sqlify(urlData.parentName)+","+\
|
||||
StringUtil.sqlify(urlData.baseRef)+","+\
|
||||
StringUtil.sqlify(urlData.errorString)+","+\
|
||||
StringUtil.sqlify(urlData.validString)+","+\
|
||||
StringUtil.sqlify(urlData.warningString)+","+\
|
||||
StringUtil.sqlify(urlData.infoString)+","+\
|
||||
`urlData.valid`+","+\
|
||||
StringUtil.sqlify(urlData.url)+","+\
|
||||
`urlData.line`+","+\
|
||||
`urlData.cached`+");\n")
|
||||
self.fd.flush()
|
||||
|
||||
def endOfOutput(self):
|
||||
self.close()
|
||||
67
linkcheck/MailtoUrlData.py
Normal file
67
linkcheck/MailtoUrlData.py
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
import re,socket,string,DNS,sys
|
||||
from HostCheckingUrlData import HostCheckingUrlData
|
||||
from smtplib import SMTP
|
||||
|
||||
class MailtoUrlData(HostCheckingUrlData):
|
||||
"Url link with mailto scheme"
|
||||
|
||||
def buildUrl(self):
|
||||
HostCheckingUrlData.buildUrl(self)
|
||||
if not re.compile("^mailto:([\-\w.]+@[\-\w.?=]+|[\w\s]+<[\-\w.]+@[\-\w.?=]+>)").match(self.urlName):
|
||||
raise Exception, "Illegal mailto link syntax"
|
||||
self.host = self.urlName[7:]
|
||||
i = string.find(self.host, "<")
|
||||
j = string.find(self.host, ">")
|
||||
if i!=-1 and j!=-1 and i<j:
|
||||
self.host = self.host[i+1:j]
|
||||
i = string.find(self.host, "@")
|
||||
self.user = self.host[:i]
|
||||
self.host = self.host[(i+1):]
|
||||
i = string.find(self.host, "?")
|
||||
if i!=-1:
|
||||
self.host = self.host[:i]
|
||||
self.host = string.lower(self.host)
|
||||
# do not lower the user name
|
||||
|
||||
def checkConnection(self, config):
|
||||
DNS.ParseResolvConf()
|
||||
mxrecords = DNS.mxlookup(self.host)
|
||||
if not len(mxrecords):
|
||||
self.setError("No mail host for "+self.host+" found")
|
||||
return
|
||||
smtpconnect = 0
|
||||
for mxrecord in mxrecords:
|
||||
try:
|
||||
self.urlConnection = SMTP(mxrecord[1])
|
||||
smtpconnect = 1
|
||||
self.urlConnection.helo()
|
||||
info = self.urlConnection.verify(self.user)
|
||||
if info[0]==250:
|
||||
self.setInfo("Verified adress: "+info[1])
|
||||
except:
|
||||
type, value = sys.exc_info()[:2]
|
||||
#print value
|
||||
if smtpconnect: break
|
||||
|
||||
if not smtpconnect:
|
||||
self.setWarning("None of the mail hosts for "+self.host+" accepts an SMTP connection")
|
||||
mxrecord = mxrecords[0][1]
|
||||
else:
|
||||
mxrecord = mxrecord[1]
|
||||
self.setValid("found mail host "+mxrecord)
|
||||
|
||||
|
||||
def closeConnection(self):
|
||||
try: self.urlConnection.quit()
|
||||
except: pass
|
||||
self.urlConnection = None
|
||||
|
||||
|
||||
def getCacheKey(self):
|
||||
return "mailto:"+self.user+"@"+HostCheckingUrlData.getCacheKey(self)
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return "Mailto link\n"+HostCheckingUrlData.__str__(self)
|
||||
|
||||
|
||||
76
linkcheck/OutputReader.py
Normal file
76
linkcheck/OutputReader.py
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
import string,re
|
||||
import UrlData
|
||||
|
||||
class ParseException(Exception):
|
||||
pass
|
||||
|
||||
class OutputReader:
|
||||
|
||||
ws = re.compile("\s+")
|
||||
regex_realUrl = re.compile("^Real URL.+")
|
||||
regex_result = re.compile("^Result.+")
|
||||
regex_base = re.compile("^Base.+")
|
||||
regex_info = re.compile("^Info.+")
|
||||
regex_warning = re.compile("^Warning.+")
|
||||
regex_parentUrl = re.compile("^Parent URL.+")
|
||||
regex_valid = re.compile("^Valid.*")
|
||||
|
||||
def resetState(self):
|
||||
self.urlName = None
|
||||
self.parentName = None
|
||||
self.baseRef = None
|
||||
self.info = None
|
||||
self.warning = None
|
||||
self.result = None
|
||||
self.linenumber = 0
|
||||
self.state = 0
|
||||
|
||||
def parse(self, file):
|
||||
line = file.readline()
|
||||
url = None
|
||||
urls = []
|
||||
self.resetState()
|
||||
|
||||
while line:
|
||||
if OutputReader.ws.match(line):
|
||||
if self.state>=2:
|
||||
#append url
|
||||
urldata = UrlData.GetUrlDataFrom(self.urlName, 0,
|
||||
self.parentName, self.baseRef, self.linenumber)
|
||||
if self.info:
|
||||
urldata.setInfo(self.info)
|
||||
if self.warning:
|
||||
urldata.setWarning(self.info)
|
||||
if OutputReader.regex_valid.match(self.result):
|
||||
urldata.valid=1
|
||||
urldata.validString = self.result
|
||||
else:
|
||||
urldata.valid=0
|
||||
urldata.errorString = self.result
|
||||
urls.append(urldata)
|
||||
elif self.state:
|
||||
raise ParseException, "No Real URL and Result keyword found"
|
||||
self.resetState()
|
||||
|
||||
elif OutputReader.regex_realUrl.match(line):
|
||||
self.state = self.state+1
|
||||
self.urlName = string.strip(line[8:])
|
||||
elif OutputReader.regex_result.match(line):
|
||||
self.state = self.state+1
|
||||
self.result = string.strip(line[6:])
|
||||
elif OutputReader.regex_info.match(line):
|
||||
self.info = string.strip(line[4:])
|
||||
elif OutputReader.regex_base.match(line):
|
||||
self.baseRef = string.strip(line[4:])
|
||||
elif OutputReader.regex_warning.match(line):
|
||||
self.warning = string.strip(line[7:])
|
||||
elif OutputReader.regex_parentUrl.match(line):
|
||||
self.parentName = string.strip(line[10:])
|
||||
if ',' in self.parentName:
|
||||
self.parentName,self.linenumber = string.split(self.parentName,",",1)
|
||||
else:
|
||||
pass
|
||||
|
||||
line = file.readline()
|
||||
return urls
|
||||
|
||||
156
linkcheck/RobotsTxt.py
Normal file
156
linkcheck/RobotsTxt.py
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
import re,urlparse,string,httplib,urllib,sys,StringUtil,Config
|
||||
|
||||
class RobotsTxt:
|
||||
def __init__(self, base, useragent):
|
||||
self.entries = []
|
||||
self.disallowAll = 0
|
||||
self.allowAll = 0
|
||||
self.base = base
|
||||
|
||||
try:
|
||||
urlConnection = httplib.HTTP(base)
|
||||
urlConnection.putrequest("GET", "/robots.txt")
|
||||
urlConnection.putheader("User-agent", useragent)
|
||||
urlConnection.endheaders()
|
||||
status = urlConnection.getreply()[0]
|
||||
if status==401 or status==403:
|
||||
self.disallowAll = 1
|
||||
else:
|
||||
if status>=400:
|
||||
self.allowAll = 1
|
||||
|
||||
if status<400:
|
||||
self.parseUrl(urlConnection)
|
||||
except:
|
||||
type, value = sys.exc_info()[:2]
|
||||
Config.debug("Hoppla. "+str(value))
|
||||
self.allowAll = 1
|
||||
|
||||
def parseUrl(self, urlConnection):
|
||||
data = urlConnection.getfile().readlines()
|
||||
state = 0
|
||||
linenumber = 0
|
||||
entry = Entry()
|
||||
|
||||
for line in data:
|
||||
line = string.lower(string.strip(line))
|
||||
linenumber = linenumber + 1
|
||||
|
||||
if len(line)<=0:
|
||||
if state==1:
|
||||
raise ParseException, \
|
||||
"robots.txt:"+`linenumber`+": no rules found"
|
||||
elif state==2:
|
||||
self.entries.append(entry)
|
||||
entry = Entry()
|
||||
state = 0
|
||||
line = string.strip(StringUtil.stripFenceComments(line))
|
||||
if len(line)<=0:
|
||||
continue
|
||||
|
||||
if re.compile("^user-agent:.+").match(line):
|
||||
if state==2:
|
||||
raise ParseException, \
|
||||
"robots.txt:"+`linenumber`+": user-agent in the middle of rules"
|
||||
entry.useragents.append(string.strip(line[11:]))
|
||||
state = 1
|
||||
|
||||
elif re.compile("^disallow:.+").match(line):
|
||||
if state==0:
|
||||
raise ParseException, \
|
||||
"robots.txt:"+`linenumber`+": disallow without user agents"
|
||||
line = string.strip(line[9:])
|
||||
entry.rulelines.append(RuleLine(line, 0))
|
||||
state = 2
|
||||
|
||||
elif re.compile("^allow:.+").match(line):
|
||||
if state==0:
|
||||
raise ParseException, \
|
||||
"robots.txt:"+`linenumber`+": allow without user agents"
|
||||
line = string.strip(line[6:])
|
||||
entry.rulelines.append(RuleLine(line, 1))
|
||||
|
||||
else:
|
||||
# ignore extensions
|
||||
pass
|
||||
|
||||
|
||||
def allowance(self, useragent, path):
|
||||
Config.debug("DEBUG: checking allowance\n")
|
||||
if self.disallowAll:
|
||||
return 0
|
||||
if self.allowAll:
|
||||
return 1
|
||||
|
||||
# search for given user agent matches
|
||||
# the first match counts
|
||||
useragent = string.lower(useragent)
|
||||
for entry in self.entries:
|
||||
if entry.appliesToAgent(useragent):
|
||||
return entry.allowance(path)
|
||||
# agent not found ==> access granted
|
||||
Config.debug("DEBUG: no match, access granted\n")
|
||||
return 1
|
||||
|
||||
def __str__(self):
|
||||
ret = "RobotsTxt\n"+\
|
||||
"Base: "+self.base+"\n"+\
|
||||
"AllowAll: "+`self.allowAll`+"\n"+\
|
||||
"DisallowAll: "+`self.disallowAll`+"\n"
|
||||
for entry in self.entries:
|
||||
ret = ret + str(entry) + "\n"
|
||||
return ret
|
||||
|
||||
|
||||
|
||||
class RuleLine:
|
||||
def __init__(self, path, allowance):
|
||||
self.path = urllib.unquote(path)
|
||||
self.allowance = allowance
|
||||
|
||||
|
||||
def appliesTo(self, filename):
|
||||
return self.path=="*" or re.compile(self.path).match(filename)
|
||||
|
||||
|
||||
def __str__(self):
|
||||
if self.allowance:
|
||||
return "Allow: "+self.path
|
||||
return "Disallow: "+self.path
|
||||
|
||||
|
||||
|
||||
class Entry:
|
||||
def __init__(self):
|
||||
self.useragents = []
|
||||
self.rulelines = []
|
||||
|
||||
|
||||
def __str__(self):
|
||||
ret = ""
|
||||
for agent in self.useragents:
|
||||
ret = ret + "User-agent: "+agent+"\n"
|
||||
for line in self.rulelines:
|
||||
ret = ret + str(line) + "\n"
|
||||
return ret
|
||||
|
||||
|
||||
def appliesToAgent(self, agent):
|
||||
"check if this entry applies to the specified agent"
|
||||
for cur_agent in self.useragents:
|
||||
if cur_agent=="*":
|
||||
return 1
|
||||
if re.compile("^"+cur_agent).match(agent):
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
def allowance(self, filename):
|
||||
"""Preconditions:
|
||||
- out agent applies to this entry
|
||||
- file is URL decoded"""
|
||||
for line in self.rulelines:
|
||||
if line.appliesTo(filename):
|
||||
return line.allowance
|
||||
return 1
|
||||
|
||||
26
linkcheck/TelnetUrlData.py
Normal file
26
linkcheck/TelnetUrlData.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
import telnetlib,re
|
||||
from HostCheckingUrlData import HostCheckingUrlData
|
||||
|
||||
class TelnetUrlData(HostCheckingUrlData):
|
||||
"Url link with telnet scheme"
|
||||
|
||||
def buildUrl(self):
|
||||
HostCheckingUrlData.buildUrl(self)
|
||||
if not re.compile("^telnet:[\w.\-]+").match(self.urlName):
|
||||
raise Exception, "Illegal telnet link syntax"
|
||||
self.host = string.lower(self.urlName[7:])
|
||||
|
||||
|
||||
def checkConnection(self, config):
|
||||
HostCheckingUrlData.checkConnection(self, config)
|
||||
self.urlConnection = telnetlib.Telnet()
|
||||
self.urlConnection.open(self.host, 23)
|
||||
|
||||
|
||||
def getCacheKey(self):
|
||||
return "telnet:"+HostCheckingUrlData.getCacheKey(self)
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return "Telnet link\n"+HostCheckingUrlData.__str__(self)
|
||||
|
||||
35
linkcheck/Threader.py
Normal file
35
linkcheck/Threader.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
from threading import *
|
||||
|
||||
class Threader:
|
||||
"A thread generating class"
|
||||
|
||||
def __init__(self, num=5):
|
||||
self.maxThreads = num
|
||||
self.threads = []
|
||||
|
||||
def acquire(self):
|
||||
"Wait until we are allowed to start a new thread"
|
||||
while 1:
|
||||
self.reduceThreads()
|
||||
if len(self.threads) < self.maxThreads:
|
||||
break
|
||||
|
||||
def reduceThreads(self):
|
||||
for t in self.threads:
|
||||
if not t.isAlive():
|
||||
self.threads.remove(t)
|
||||
|
||||
def finished(self):
|
||||
return not len(self.threads)
|
||||
|
||||
def finish(self):
|
||||
self.reduceThreads()
|
||||
for t in self.threads:
|
||||
pass # dont know how to stop a thread
|
||||
|
||||
def startThread(self, callable, args):
|
||||
"Generate a new thread"
|
||||
self.acquire()
|
||||
t = Thread(None, callable, None, args)
|
||||
t.start()
|
||||
self.threads.append(t)
|
||||
294
linkcheck/UrlData.py
Normal file
294
linkcheck/UrlData.py
Normal file
|
|
@ -0,0 +1,294 @@
|
|||
import sys,re,string,urlparse,urllib,time
|
||||
import Config,StringUtil
|
||||
|
||||
LinkTags = [("a", "href"),
|
||||
("img", "src"),
|
||||
("form", "action"),
|
||||
("body", "background"),
|
||||
("frame", "src"),
|
||||
("link", "href"),
|
||||
("meta", "url"), # <meta http-equiv="refresh" content="5; url=...">
|
||||
("area", "href")]
|
||||
|
||||
class UrlData:
|
||||
"Representing a URL with additional information like validity etc"
|
||||
|
||||
def __init__(self,
|
||||
urlName,
|
||||
recursionLevel,
|
||||
parentName = None,
|
||||
baseRef = None,
|
||||
line = 0, _time = 0):
|
||||
self.urlName = urlName
|
||||
self.recursionLevel = recursionLevel
|
||||
self.parentName = parentName
|
||||
self.baseRef = baseRef
|
||||
self.errorString = "Error"
|
||||
self.validString = "Valid"
|
||||
self.warningString = None
|
||||
self.infoString = None
|
||||
self.valid = 1
|
||||
self.url = None
|
||||
self.line = line
|
||||
self.time = _time
|
||||
self.cached = 0
|
||||
self.urlConnection = None
|
||||
|
||||
|
||||
def setError(self, s):
|
||||
self.valid=0
|
||||
self.errorString = "Error: " + s
|
||||
|
||||
def setValid(self, s):
|
||||
self.valid=1
|
||||
self.validString = "Valid: " + s
|
||||
|
||||
def isHtml(self):
|
||||
return 0
|
||||
|
||||
def setWarning(self, s):
|
||||
if self.warningString:
|
||||
self.warningString = self.warningString+"\n" + s
|
||||
else:
|
||||
self.warningString = s
|
||||
|
||||
def setInfo(self, s):
|
||||
if self.infoString:
|
||||
self.infoString = self.infoString+"\n"+s
|
||||
else:
|
||||
self.infoString = s
|
||||
|
||||
def copyFrom(self, urlData):
|
||||
self.errorString = urlData.errorString
|
||||
self.validString = urlData.validString
|
||||
self.warningString = urlData.warningString
|
||||
self.infoString = urlData.infoString
|
||||
self.valid = urlData.valid
|
||||
self.time = urlData.time
|
||||
|
||||
def buildUrl(self):
|
||||
if self.baseRef:
|
||||
self.url = urlparse.urljoin(self.baseRef, self.urlName)
|
||||
elif self.parentName:
|
||||
self.url = urlparse.urljoin(self.parentName, self.urlName)
|
||||
else:
|
||||
self.url = self.urlName
|
||||
self.urlTuple = urlparse.urlparse(self.url)
|
||||
# make host lowercase
|
||||
self.urlTuple = (self.urlTuple[0],string.lower(self.urlTuple[1]),
|
||||
self.urlTuple[2],self.urlTuple[3],self.urlTuple[4],
|
||||
self.urlTuple[5])
|
||||
self.url = urlparse.urlunparse(self.urlTuple)
|
||||
|
||||
def logMe(self, config):
|
||||
if config["verbose"] or not self.valid or \
|
||||
(self.warningString and config["warnings"]):
|
||||
config.log_newUrl(self)
|
||||
|
||||
def check(self, config):
|
||||
Config.debug(Config.DebugDelim+"Checking\n"+str(self)+"\n"+\
|
||||
Config.DebugDelim)
|
||||
# check syntax
|
||||
Config.debug("DEBUG: checking syntax\n")
|
||||
if not self.urlName or self.urlName=="":
|
||||
self.setError("URL is null or empty")
|
||||
self.logMe(config)
|
||||
return
|
||||
try: self.buildUrl()
|
||||
except:
|
||||
type, value = sys.exc_info()[:2]
|
||||
self.setError(str(value))
|
||||
self.logMe(config)
|
||||
return
|
||||
|
||||
# check the cache
|
||||
Config.debug("DEBUG: checking cache\n")
|
||||
if config.urlCache_has_key(self.getCacheKey()):
|
||||
self.copyFrom(config.urlCache_get(self.getCacheKey()))
|
||||
self.cached = 1
|
||||
self.logMe(config)
|
||||
return
|
||||
|
||||
# apply filter
|
||||
Config.debug("DEBUG: checking filter\n")
|
||||
if config["strict"] and self.isExtern(config):
|
||||
self.setWarning("outside of domain filter, checked only syntax")
|
||||
self.logMe(config)
|
||||
return
|
||||
|
||||
# check connection
|
||||
Config.debug("DEBUG: checking connection\n")
|
||||
try:
|
||||
self.checkConnection(config)
|
||||
if self.urlTuple and config["anchors"]:
|
||||
self.checkAnchors(self.urlTuple[5])
|
||||
except:
|
||||
type, value = sys.exc_info()[:2]
|
||||
self.setError(str(value))
|
||||
|
||||
# check recursion
|
||||
Config.debug("DEBUG: checking recursion\n")
|
||||
if self.allowsRecursion(config):
|
||||
self.parseUrl(config)
|
||||
self.closeConnection()
|
||||
self.logMe(config)
|
||||
self.putInCache(config)
|
||||
|
||||
|
||||
def closeConnection(self):
|
||||
# brute force closing
|
||||
try: self.urlConnection.close()
|
||||
except: pass
|
||||
# release variable for garbage collection
|
||||
self.urlConnection = None
|
||||
|
||||
def putInCache(self, config):
|
||||
cacheKey = self.getCacheKey()
|
||||
if cacheKey and not self.cached:
|
||||
config.urlCache_set(cacheKey, self)
|
||||
self.cached = 1
|
||||
|
||||
def getCacheKey(self):
|
||||
if self.urlTuple:
|
||||
return urlparse.urlunparse(self.urlTuple)
|
||||
return None
|
||||
|
||||
def checkConnection(self, config):
|
||||
self.urlConnection = urllib.urlopen(self.url)
|
||||
|
||||
def allowsRecursion(self, config):
|
||||
return self.valid and \
|
||||
self.isHtml() and \
|
||||
not self.cached and \
|
||||
self.recursionLevel < config["recursionlevel"] and \
|
||||
not self.isExtern(config)
|
||||
|
||||
def isHtml(self):
|
||||
return 0
|
||||
|
||||
def checkAnchors(self, anchor):
|
||||
if not (anchor!="" and self.isHtml() and self.valid):
|
||||
return
|
||||
for cur_anchor,line in self.searchInForTag(self.getContent(), ("a", "name")):
|
||||
if cur_anchor == anchor:
|
||||
return
|
||||
self.setWarning("anchor #"+anchor+" not found")
|
||||
|
||||
def isExtern(self, config):
|
||||
if len(config["externlinks"])==0 and len(config["internlinks"])==0:
|
||||
return 0
|
||||
# deny and allow external checking
|
||||
Config.debug(self.url)
|
||||
if config["allowdeny"]:
|
||||
for pat in config["internlinks"]:
|
||||
if pat.search(self.url):
|
||||
return 0
|
||||
for pat in config["externlinks"]:
|
||||
if pat.search(self.url):
|
||||
return 1
|
||||
else:
|
||||
for pat in config["externlinks"]:
|
||||
if pat.search(self.url):
|
||||
return 1
|
||||
for pat in config["internlinks"]:
|
||||
if pat.search(self.url):
|
||||
return 0
|
||||
return 1
|
||||
|
||||
def getContent(self):
|
||||
"""Precondition: urlConnection is an opened URL.
|
||||
"""
|
||||
t = time.time()
|
||||
data = StringUtil.stripHtmlComments(self.urlConnection.read())
|
||||
self.time = time.time() - t
|
||||
return data
|
||||
|
||||
def parseUrl(self, config):
|
||||
Config.debug(Config.DebugDelim+"Parsing recursively into\n"+\
|
||||
str(self)+"\n"+Config.DebugDelim)
|
||||
data = self.getContent()
|
||||
|
||||
# search for a possible base reference
|
||||
bases = self.searchInForTag(data, ("base", "href"))
|
||||
baseRef = None
|
||||
if len(bases)>=1:
|
||||
baseRef = bases[0][0]
|
||||
if len(bases)>1:
|
||||
self.setWarning("more than one base tag found")
|
||||
|
||||
# search for tags and add found tags to URL queue
|
||||
for tag in LinkTags:
|
||||
urls = self.searchInForTag(data, tag)
|
||||
Config.debug("DEBUG: "+str(tag)+" urls="+str(urls)+"\n")
|
||||
for _url,line in urls:
|
||||
config.appendUrl(GetUrlDataFrom(_url,
|
||||
self.recursionLevel+1, self.url, baseRef, line))
|
||||
|
||||
def searchInForTag(self, data, tag):
|
||||
_urls = []
|
||||
_prefix="<\s*"+tag[0]+"\s+[^>]*?"+tag[1]+"\s*=\s*"
|
||||
_suffix="[^>]*>"
|
||||
_patterns = [re.compile(_prefix+"\"([^\"]+)\""+_suffix, re.I),
|
||||
re.compile(_prefix+"([^\s>]+)" +_suffix, re.I)]
|
||||
cutofflines = 0
|
||||
for _pattern in _patterns:
|
||||
while 1:
|
||||
_match = _pattern.search(data)
|
||||
if not _match: break
|
||||
# need to strip optional ending quotes for the <meta url=> tag
|
||||
linenumberbegin = StringUtil.getLineNumber(data, _match.start(0))
|
||||
linenumberend = StringUtil.getLineNumber(data, _match.end(0))
|
||||
cutofflines = cutofflines + linenumberend - linenumberbegin
|
||||
_urls.append((string.strip(StringUtil.rstripQuotes(_match.group(1))),
|
||||
linenumberbegin + cutofflines))
|
||||
data = data[:_match.start(0)] + data[_match.end(0):]
|
||||
|
||||
return _urls
|
||||
|
||||
def __str__(self):
|
||||
return "urlname="+`self.urlName`+"\nparentName="+`self.parentName`+\
|
||||
"\nbaseRef="+`self.baseRef`+"\ncached="+`self.cached`+\
|
||||
"\nrecursionLevel="+`self.recursionLevel`+\
|
||||
"\nurlConnection="+str(self.urlConnection)
|
||||
|
||||
from FileUrlData import FileUrlData
|
||||
from FtpUrlData import FtpUrlData
|
||||
from GopherUrlData import GopherUrlData
|
||||
from HttpUrlData import HttpUrlData
|
||||
from HttpsUrlData import HttpsUrlData
|
||||
from JavascriptUrlData import JavascriptUrlData
|
||||
from MailtoUrlData import MailtoUrlData
|
||||
from TelnetUrlData import TelnetUrlData
|
||||
|
||||
def GetUrlDataFrom(urlName,
|
||||
recursionLevel,
|
||||
parentName = None,
|
||||
baseRef = None, line = 0, _time = 0):
|
||||
# search for the absolute url
|
||||
name=""
|
||||
if urlName and ":" in urlName:
|
||||
name = string.lower(urlName)
|
||||
elif baseRef and ":" in baseRef:
|
||||
name = string.lower(baseRef)
|
||||
elif parentName and ":" in parentName:
|
||||
name = string.lower(parentName)
|
||||
# test scheme
|
||||
if re.compile("^http:").search(name):
|
||||
return HttpUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)
|
||||
if re.compile("^ftp:").search(name):
|
||||
return FtpUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)
|
||||
if re.compile("^file:").search(name):
|
||||
return FileUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)
|
||||
if re.compile("^telnet:").search(name):
|
||||
return TelnetUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)
|
||||
if re.compile("^mailto:").search(name):
|
||||
return MailtoUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)
|
||||
if re.compile("^gopher:").search(name):
|
||||
return GopherUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)
|
||||
if re.compile("^javascript:").search(name):
|
||||
return JavascriptUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)
|
||||
if re.compile("^https:").search(name):
|
||||
return HttpsUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)
|
||||
# assume local file
|
||||
return FileUrlData(urlName, recursionLevel, parentName, baseRef, line, _time)
|
||||
|
||||
15
linkcheck/__init__.py
Normal file
15
linkcheck/__init__.py
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
# __init__.py for this module
|
||||
|
||||
import Config,UrlData,OutputReader,sys
|
||||
|
||||
def checkUrls(config = Config.Configuration()):
|
||||
config.log_init()
|
||||
try:
|
||||
while not config.finished():
|
||||
if config.hasMoreUrls():
|
||||
config.checkUrl(config.getUrl())
|
||||
except KeyboardInterrupt:
|
||||
config.finish()
|
||||
config.log_endOfOutput()
|
||||
sys.exit(1) # this is not good(tm)
|
||||
config.log_endOfOutput()
|
||||
247
linkchecker
Executable file
247
linkchecker
Executable file
|
|
@ -0,0 +1,247 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import getopt,sys,re,string
|
||||
|
||||
if sys.version[:5] < "1.5.2":
|
||||
print "This program requires Python 1.5.2 or later."
|
||||
sys.exit(1)
|
||||
|
||||
# add the path to linkcheck module
|
||||
sys.path.insert(0, "/usr/share/linkchecker")
|
||||
|
||||
import linkcheck
|
||||
|
||||
Usage = """USAGE\tpylice [options] file_or_url...
|
||||
|
||||
OPTIONS
|
||||
-a, --anchors
|
||||
Check anchor references. Default is don't check anchors.
|
||||
-D, --debug
|
||||
Print additional debugging information.
|
||||
-e regex, --extern=regex
|
||||
Assume urls that match the given expression as extern.
|
||||
Only intern HTTP links are checked recursively.
|
||||
-f file, --config=file
|
||||
Use file as configuration file. Pylice first searches ~/.pylicerc
|
||||
and then /etc/pylicerc (under Windows <path-to-program>\\pylicerc).
|
||||
-i regex, --intern=regex
|
||||
Assume urls that match the given expression as intern.
|
||||
-h, --help
|
||||
Help me! Print usage information for this program.
|
||||
-l, --allowdeny
|
||||
Swap checking order to intern/extern. Default checking order
|
||||
is extern/intern.
|
||||
-o name, --output=name
|
||||
Specify output as """+linkcheck.Config.LoggerKeys+""".
|
||||
Default is text.
|
||||
-W name, --file-output=name
|
||||
Same as output, but write to a file pylice-out.<name>.
|
||||
If the file already exists, it is overwritten.
|
||||
You can specify this option more than once.
|
||||
Default is no file output.
|
||||
-p pwd, --password=pwd
|
||||
Try given password for HTML and FTP authorization.
|
||||
Default is 'joe@'. See -u.
|
||||
-P host[:port], --proxy=host[:port]
|
||||
Use specified proxy for HTTP requests.
|
||||
Standard port is 8080. Default is to use no proxy.
|
||||
-q, --quiet
|
||||
Quiet operation. This is only useful with -W.
|
||||
-r depth, --recursion-level=depth
|
||||
Check recursively all links up to given depth (depth >= 0).
|
||||
Default depth is 1.
|
||||
-R, --robots-txt
|
||||
Obey the robots exclusion standard.
|
||||
-s, --strict
|
||||
Check only syntax of extern links, do not try to connect to them.
|
||||
-t num, --threads=num
|
||||
Generate no more than num threads. Default number of threads is 5.
|
||||
To disable threading specify a non-positive number.
|
||||
-u name, --user=name
|
||||
Try given username for HTML and FTP authorization.
|
||||
Default is 'anonymous'. See -p.
|
||||
-V, --version
|
||||
Print version and exit.
|
||||
-v, --verbose
|
||||
Log all checked URLs (implies -w). Default is to log only invalid
|
||||
URLs.
|
||||
-w, --warnings
|
||||
Log warnings.
|
||||
"""
|
||||
|
||||
Notes = """NOTES
|
||||
o Pylice assumes an http:// resp. ftp:// link when a commandline URL
|
||||
starts with "www." resp. "ftp.".
|
||||
You can also give local files as arguments.
|
||||
o If you have your system configured to automatically establish a
|
||||
connection to the internet (e.g. with diald), it will connect when
|
||||
checking links not pointing to your local host.
|
||||
Use the -s and -i options to prevent this (see EXAMPLES).
|
||||
o Javascript and https links are currently ignored
|
||||
o If your platform does not support threading, pylice assumes -t0
|
||||
"""
|
||||
|
||||
Examples = """EXAMPLES
|
||||
o pylice -v -o html -r2 -s -i treasure.calvinsplayground.de \\
|
||||
http://treasure.calvinsplayground.de/~calvin/ > sample.html
|
||||
generates the included sample.html file
|
||||
o Local files and syntactic sugar on the command line:
|
||||
pylice c:\\temp\\test.html
|
||||
pylice ../bla.html
|
||||
pylice www.myhomepage.de
|
||||
pylice -r0 ftp.linux.org
|
||||
"""
|
||||
|
||||
def printVersion():
|
||||
print Config.AppInfo
|
||||
sys.exit(0)
|
||||
|
||||
def printHelp():
|
||||
print Usage
|
||||
print Notes
|
||||
print Examples
|
||||
sys.exit(0)
|
||||
|
||||
def printUsage(msg):
|
||||
sys.stderr.write("Error: "+str(msg)+"\nType pylice -h for help\n")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# Read command line arguments
|
||||
try:
|
||||
# Note: cut out the name of the script
|
||||
options, args = getopt.getopt(sys.argv[1:], "aDe:f:hi:lP:o:p:qr:Rst:u:VvwW:",
|
||||
["anchors",
|
||||
"config=",
|
||||
"debug",
|
||||
"extern=",
|
||||
"file-output=",
|
||||
"help",
|
||||
"intern=",
|
||||
"allowdeny",
|
||||
"output=",
|
||||
"proxy=",
|
||||
"password=",
|
||||
"quiet",
|
||||
"recursion-level=",
|
||||
"robots-txt",
|
||||
"strict",
|
||||
"threads=",
|
||||
"user=",
|
||||
"version",
|
||||
"verbose",
|
||||
"warnings"])
|
||||
except getopt.error:
|
||||
type, value = sys.exc_info()[:2]
|
||||
printUsage(value)
|
||||
|
||||
# apply configuration
|
||||
config = linkcheck.Config.Configuration()
|
||||
try:
|
||||
configfile = []
|
||||
for opt,arg in options:
|
||||
if opt=="-f" or opt=="--config":
|
||||
configfile.append(arg)
|
||||
config.read(configfile)
|
||||
except:
|
||||
type, value = sys.exc_info()[:2]
|
||||
printUsage(value)
|
||||
|
||||
|
||||
# apply options and arguments
|
||||
constructAuth = 0
|
||||
for opt,arg in options:
|
||||
if opt=="-a" or opt=="--anchors":
|
||||
config["anchors"] = 1
|
||||
|
||||
elif opt=="-D" or opt=="--debug":
|
||||
linkcheck.Config.DebugFlag = 1
|
||||
|
||||
elif opt=="-e" or opt=="--extern":
|
||||
config["externlinks"].append(re.compile(arg))
|
||||
|
||||
elif opt=="-h" or opt=="--help":
|
||||
printHelp()
|
||||
|
||||
elif opt=="-o" or opt=="--output":
|
||||
if linkcheck.Config.Loggers.has_key(arg):
|
||||
config["log"] = linkcheck.Config.Loggers[arg]()
|
||||
else:
|
||||
printUsage("Legal output arguments are "+linkcheck.Config.LoggerKeys+".")
|
||||
|
||||
elif opt=="-W" or opt=="--file-output":
|
||||
if linkcheck.Config.Loggers.has_key(arg):
|
||||
config["fileoutput"].append(linkcheck.Config.Loggers[arg](open("pylice-out."+arg, "w")))
|
||||
else:
|
||||
printUsage("Legal output arguments are "+linkcheck.Config.LoggerKeys+".")
|
||||
|
||||
elif opt=="-i" or opt=="--intern":
|
||||
config["internlinks"].append(re.compile(arg))
|
||||
|
||||
elif opt=="-l" or opt=="--allowdeny":
|
||||
config["allowdeny"] = 1
|
||||
|
||||
elif opt=="-P" or opt=="--proxy":
|
||||
proxy = re.compile("(.+):(.+)").match(arg)
|
||||
if proxy:
|
||||
config["proxy"] = proxy.group(1)
|
||||
config["proxyport"] = int(proxy.group(2))
|
||||
else:
|
||||
config["proxy"] = arg
|
||||
|
||||
elif opt=="-p" or opt=="--password":
|
||||
config["password"]=arg
|
||||
constructAuth=constructAuth+1
|
||||
|
||||
elif opt=="-q" or opt=="--quiet":
|
||||
config["quiet"]=1
|
||||
|
||||
elif opt=="-r" or opt=="--recursion-level":
|
||||
if int(arg) >= 0:
|
||||
config["recursionlevel"] = int(arg)
|
||||
else:
|
||||
printUsage("Illegal recursion-level number: "+arg)
|
||||
|
||||
elif opt=="-R" or opt=="--robots-txt":
|
||||
config["robotstxt"] = 1
|
||||
|
||||
elif opt=="-s" or opt=="--strict":
|
||||
config["strict"] = 1
|
||||
|
||||
elif opt=="-t" or opt=="--threads":
|
||||
num = int(arg)
|
||||
if config["threads"]:
|
||||
if num>0:
|
||||
config.enableThreading(num)
|
||||
else:
|
||||
config.disableThreading()
|
||||
|
||||
elif opt=="-u" or opt=="--user":
|
||||
config["user"] = arg
|
||||
constructAuth=constructAuth+1
|
||||
|
||||
elif opt=="-V" or opt=="--version":
|
||||
printVersion()
|
||||
|
||||
elif opt=="-v" or opt=="--verbose":
|
||||
config["verbose"] = 1
|
||||
config["warnings"] = 1
|
||||
|
||||
elif opt=="-w" or opt=="--warnings":
|
||||
config["warnings"] = 1
|
||||
|
||||
if constructAuth and constructAuth!=2:
|
||||
sys.stderr.write("Warning: try to give me both Username and Password\n")
|
||||
|
||||
if len(args)==0:
|
||||
printUsage("no files or urls given")
|
||||
|
||||
for url in args:
|
||||
if not (":" in url):
|
||||
if re.compile("^ftp\.").match(url):
|
||||
url = "ftp://"+url
|
||||
elif re.compile("^www\.").match(url):
|
||||
url = "http://"+url
|
||||
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0))
|
||||
|
||||
linkcheck.checkUrls(config)
|
||||
8
linkchecker.bat
Normal file
8
linkchecker.bat
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
@echo off
|
||||
|
||||
rem === adjust vars below ===
|
||||
set PYTHON=c:\progra~1\python\python.exe
|
||||
set LINKCHECKER=c:\progra~1\linkchecker-1.1.0
|
||||
rem === end configure ===
|
||||
|
||||
%PYTHON% %LINKCHECKER%\pylice %1 %2 %3 %4 %5 %6 %7 %8 %9
|
||||
24
linkcheckerrc
Normal file
24
linkcheckerrc
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
# sample resource file
|
||||
# see linkchecker -h for help on these options
|
||||
|
||||
[output]
|
||||
#debug=1
|
||||
#log=colored
|
||||
#verbose=1
|
||||
#warnings=1
|
||||
#quiet=0
|
||||
|
||||
[checking]
|
||||
#threads=15
|
||||
#anchors=1
|
||||
#externlinks=
|
||||
#internlinks=
|
||||
#allowdeny=1
|
||||
#password=calvin@
|
||||
#user=anonymous
|
||||
#recursionlevel=1
|
||||
#robotstxt=1
|
||||
#strict=1
|
||||
#proxy=
|
||||
#proxyport=8080
|
||||
|
||||
26
parsetest.py
Normal file
26
parsetest.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
import sys,StringIO,LinkChecker
|
||||
|
||||
def linkcheck(urls):
|
||||
"Check a list of http://, file:// etc. urls"
|
||||
config = LinkChecker.Config.Configuration()
|
||||
config["verbose"]=1
|
||||
config["warnings"]=1
|
||||
# no more options, use defaults
|
||||
|
||||
# add urls
|
||||
for url in urls:
|
||||
config.appendUrl(LinkChecker.UrlData.GetUrlDataFrom(url, 0))
|
||||
|
||||
# check it
|
||||
LinkChecker.checkUrls(config)
|
||||
|
||||
old_stdout = sys.stdout
|
||||
sys.stdout = StringIO.StringIO()
|
||||
linkcheck(['http://fsinfo.cs.uni-sb.de/~calvin'])
|
||||
sys.stdout.seek(0)
|
||||
reader = LinkChecker.OutputReader.OutputReader()
|
||||
old_stdout.write(sys.stdout.getvalue())
|
||||
result = reader.parse(sys.stdout)
|
||||
sys.stdout = old_stdout
|
||||
for url in result:
|
||||
print str(url)
|
||||
1
test/.cvsignore
Normal file
1
test/.cvsignore
Normal file
|
|
@ -0,0 +1 @@
|
|||
*.result
|
||||
6
test/base1.html
Normal file
6
test/base1.html
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
<!-- base testing -->
|
||||
|
||||
<base target="_top">
|
||||
<a href
|
||||
=
|
||||
"file:/etc">
|
||||
3
test/base2.html
Normal file
3
test/base2.html
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
<base href="file:/etc/">
|
||||
<a href="passwd">
|
||||
|
||||
2
test/base3.html
Normal file
2
test/base3.html
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
<base href="http://treasure.calvinsplayground.de/~calvin/">
|
||||
<a href="index.shtml">
|
||||
4
test/frames.html
Normal file
4
test/frames.html
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
<frameset border="0" frameborder="0" framespacing="0">
|
||||
<frame name="top" src="test1.html" frameborder="0">
|
||||
<frame name="bottom" src="test2.html" frameborder="0">
|
||||
</frameset>
|
||||
17
test/test1.html
Normal file
17
test/test1.html
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
Just some HTTP links
|
||||
<a href="http://www.garantiertnixgutt.bla">
|
||||
<a href="http://www.heise.de">
|
||||
<a href="http:/www.heise.de">
|
||||
<a href="http:www.heise.de">
|
||||
<a href="http://">
|
||||
<a href="http:/">
|
||||
<a href="http:">
|
||||
<a name="iswas"> <!-- anchor for test2.html -->
|
||||
<a href=http://slashdot.org/>
|
||||
<a href="http://treasure.calvinsplayground.de/~calvin/software/#isnix">
|
||||
<a href="https://www.heise.de"> <!-- ignore -->
|
||||
<a href="HtTP://WWW.hEIsE.DE">
|
||||
<a href="HTTP://WWW.HEISE.DE"> <!-- should be cached -->
|
||||
<!-- <a href=http://nocheckin> -->
|
||||
<!-- check the parser at end of file -->
|
||||
<a href="g
|
||||
23
test/test2.html
Normal file
23
test/test2.html
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
<meta http-equiv="refresh" content="5; url=http://localhost">
|
||||
<a href="hutzli:nixgutt">
|
||||
<a href="javascript:loadthis()">
|
||||
<a href="file:///etc/group">
|
||||
<a href="file://etc/group">
|
||||
<a href="file:/etc/group">
|
||||
<a href="file:etc/group">
|
||||
<a href="file:/etc/">
|
||||
<a href="test1.html">
|
||||
<a href="test1.html#isnix">
|
||||
<a href="test1.html#iswas">
|
||||
<a href=mailto:calvin@localhost?subject=Hallo Pfuscher>
|
||||
<a href=mailto:Bastian Kleineidam <calvin@host1?foo=bar>>
|
||||
<a href="mailto:Bastian Kleineidam <calvin@studcs.uni-sb.de>">
|
||||
<a href="mailto:calvin@host3">
|
||||
<a href="mailto:">
|
||||
<a href="telnet:localhost">
|
||||
<a href="telnet:">
|
||||
<a href="ftp:/treasure.calvinsplayground.de/pub">
|
||||
<a href="ftp://treasure.calvinsplayground.de/pub">
|
||||
<a href="ftp://treasure.calvinsplayground.de//pub">
|
||||
<a href="ftp://treasure.calvinsplayground.de////////pub">
|
||||
<a href="ftp:///treasure.calvinsplayground.de/pub">
|
||||
22
tests/test.py
Executable file
22
tests/test.py
Executable file
|
|
@ -0,0 +1,22 @@
|
|||
#!/opt/python/bin/python1.5
|
||||
|
||||
import DNS
|
||||
# automatically load nameserver(s) from /etc/resolv.conf
|
||||
# (works on unix - on others, YMMV)
|
||||
DNS.ParseResolvConf()
|
||||
|
||||
# lets do an all-in-one request
|
||||
# set up the request object
|
||||
r = DNS.DnsRequest(name='munnari.oz.au',qtype='A')
|
||||
# do the request
|
||||
a=r.req()
|
||||
# and do a pretty-printed output
|
||||
a.show()
|
||||
|
||||
# now lets setup a reusable request object
|
||||
r = DNS.DnsRequest(qtype='ANY')
|
||||
res = r.req("a.root-servers.nex",qtype='ANY')
|
||||
res.show()
|
||||
res = r.req("proxy.connect.com.au")
|
||||
res.show()
|
||||
|
||||
17
tests/test2.py
Executable file
17
tests/test2.py
Executable file
|
|
@ -0,0 +1,17 @@
|
|||
#!/opt/python/bin/python1.5
|
||||
|
||||
import DNS
|
||||
# automatically load nameserver(s) from /etc/resolv.conf
|
||||
# (works on unix - on others, YMMV)
|
||||
DNS.ParseResolvConf()
|
||||
|
||||
r=DNS.Request(qtype='mx')
|
||||
res = r.req('connect.com.au')
|
||||
res.show()
|
||||
|
||||
r=DNS.Request(qtype='soa')
|
||||
res = r.req('connect.com.au')
|
||||
res.show()
|
||||
|
||||
print DNS.revlookup('192.189.54.17')
|
||||
|
||||
13
tests/test3.py
Executable file
13
tests/test3.py
Executable file
|
|
@ -0,0 +1,13 @@
|
|||
#!/opt/python/bin/python1.5
|
||||
|
||||
import DNS
|
||||
# automatically load nameserver(s) from /etc/resolv.conf
|
||||
# (works on unix - on others, YMMV)
|
||||
DNS.ParseResolvConf()
|
||||
|
||||
# web server reliability, the NT way. *snigger*
|
||||
res = r.req('www.microsoft.com',qtype='A')
|
||||
# res.answers is a list of dictionaries of answers
|
||||
print len(res.answers),'different A records'
|
||||
# each of these has an entry for 'data', which is the result.
|
||||
print map(lambda x:x['data'], res.answers)
|
||||
7
tests/test4.py
Executable file
7
tests/test4.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/opt/python/bin/python
|
||||
|
||||
import DNS
|
||||
|
||||
DNS.ParseResolvConf()
|
||||
|
||||
print DNS.mxlookup("connect.com.au")
|
||||
52
tests/test5.py
Executable file
52
tests/test5.py
Executable file
|
|
@ -0,0 +1,52 @@
|
|||
#!/opt/python/bin/python
|
||||
|
||||
import DNS
|
||||
DNS.ParseResolvConf()
|
||||
|
||||
def Error(mesg):
|
||||
import sys
|
||||
print sys.argv[0],"ERROR:"
|
||||
print mesg
|
||||
sys.exit(1)
|
||||
|
||||
def main():
|
||||
import sys
|
||||
if len(sys.argv) != 2:
|
||||
Error("usage: %s somedomain.com"%sys.argv[0])
|
||||
domain = sys.argv[1]
|
||||
nslist = GetNS(domain)
|
||||
print "According to the primary, the following are nameservers for this domain"
|
||||
for ns in nslist:
|
||||
print " ",ns
|
||||
CheckNS(ns,domain)
|
||||
|
||||
|
||||
def GetNS(domain):
|
||||
import DNS
|
||||
r = DNS.Request(domain,qtype='SOA').req()
|
||||
if r.header['status'] != 'NOERROR':
|
||||
Error("received status of %s when attempting to look up SOA for domain"%
|
||||
(r.header['status']))
|
||||
primary,email,serial,refresh,retry,expire,minimum = r.answers[0]['data']
|
||||
print "Primary nameserver for domain %s is: %s"%(domain,primary)
|
||||
r = DNS.Request(domain,qtype='NS',server=primary,aa=1).req()
|
||||
if r.header['status'] != 'NOERROR':
|
||||
Error("received status of %s when attempting to query %s for NSs"%
|
||||
(r.header['status']))
|
||||
if r.header['aa'] != 1:
|
||||
Error("primary NS %s doesn't believe that it's authoritative!"% primary)
|
||||
nslist = map(lambda x:x['data'], r.answers)
|
||||
return nslist
|
||||
|
||||
def CheckNS(nameserver,domain):
|
||||
r = DNS.Request(domain,qtype='SOA',server=nameserver,aa=1).req()
|
||||
if r.header['status'] != 'NOERROR':
|
||||
Error("received status of %s when attempting to query %s for NS"%
|
||||
(r.header['status']))
|
||||
if r.header['aa'] != 1:
|
||||
Error("NS %s doesn't believe that it's authoritative!"% nameserver)
|
||||
primary,email,serial,refresh,retry,expire,minimum = r.answers[0]['data']
|
||||
print " NS has serial",serial[1]
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in a new issue