commit 0329ca7682f735fab3fc17281a2d4c59876cabbb Author: calvin Date: Sat Feb 26 10:24:46 2000 +0000 Initial revision git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@5 e7d03fd6-7b0d-0410-9947-9c21f3af8025 diff --git a/.cvsignore b/.cvsignore new file mode 100644 index 00000000..558a80bb --- /dev/null +++ b/.cvsignore @@ -0,0 +1,3 @@ +build-stamp +sample.html +linkchecker-out.* diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 00000000..36a10cab --- /dev/null +++ b/ChangeLog @@ -0,0 +1,148 @@ +25.2.2000 + * changed the name to LinkChecker. My old Java LinkChecker will + disappear because I do not maintain it anymore. + +21.2.2000 + * add -q, --quiet option + * convert all url host to lowercase + * log the download time for urls + +20.2.2000 + * add Graph Modelling Language (GML) output for sitemaps + * add SQL output + +19.2.2000 + * second try with HTTP/1.1: additionally close response + * remove deprecated options + * new option -W, --file-output + * fix typo for --password option + +18.2.2000 + * add "-" to mail adress syntax (Baz ) + * fix typo in pylice (David J. MacKenzie ) + +10.2.2000 Version 0.8.0 + * clean the CVS dir + * fixes for configuration + * first version of configuration parsing + +9.2.2000 + * do not pass anchor in HTTP requests + * fixes for configuration parsing + +8.2.2000 + * fixed bad finished_NoThreads function + * backed out HTTP/1.1 support. This library is buggy and + does not close some filehandles. Eventually you will get + a "Too many open files" error + * strip whitespace from parsed urls + +6.2.2000 + * fixed some bugs, the test suite is running again + +5.2.2000 + * made "LinkChecker" module + * configuration is dynamic; no more class variables + * print line number + * more agressive closing of filehandles + +27.1.2000 Version 0.7.0 + * put pylicerc in /etc for .deb package + * HTTP/1.1 support with httplib.py from Greg Stein + * DNS MX lookup for mail adresses + use the DNS module from Guido van Rossum and Anthony Baxter + MX lookup was a suggestion to LinkChecker from + Jimmy Engelbrecht + +26.1.2000 Version 0.6.2 + * refined HTML link syntax to handle non-quoted URLs + * fix: set urlTuple to None if we cannot check anchors + * fixed anchor checking again + +25.1.2000 Version 0.6.1 + * fixed the HTML link syntax + +24.1.2000 + * fix: -e option did not work properly + * fix: reenabled LinkChecker Online, updated to 0.6.0 + +21.1.2000 Version 0.6.0 + * fix: add hostname for relative redirections + * Added TODO list + +20.1.2000 + * Added documentation for the LinkChecker class + +19.1.2000 + * HTTP Proxy support + * CGI logging + +18.1.2000 Version 0.5.0 + * anchor checking in local HTML files + * configuration file + * HTTP Authorization support + * Send HTTP HEAD method to check and GET method to get contents + * Still missing: Proxy support (including HTTP status code 305) + +17.1.2000 + * cut parameter, query and fragment of local file names + * limit number of redirections to 5 + +14.1.2000 Version 0.4.3 + * pylice.bat fix: now it really works + * fix for local Windows file arguments + +14.1.2000 Version 0.4.2 + * StringUtil.indentWith: use string multiplying + * Still missing: HTTP authorization and Proxy support + * pylice.bat fix: pass parameters + +13.1.2000 Version 0.4.1 + * Windows python.bat script + * installation updates + * additional .zip package for Windows + +12.1.2000 Version 0.4.0 + * fixed LinkChecker.NumThreads setting: if the platform + does not support threading, it is disabled automagically + * robots.txt parsing + * split up UrlData.py + * simplified option parsing + * strip optional quotes from urls + * use quit() not close() to disconnect from FTP servers + +11.1.2000 Version 0.3.0 + * try to finger for mailto: links + * try to connect for telnet: links + * removed time.sleep(1) commands, they are not necessary + * restrict CGI to recursion level 3 + * make UrlCache and RobotsTxtCache thread safe + * fixed the 'No more open files' bug by closing all connections + * fixed thread synchronization in LinkChecker while loop + * you can specify -t 0 on the commandline to disable threading + * STILL MISSING: + HTTP authorization, Proxy and robots.txt parsing + +10.1.2000 Version 0.2.0 + * configure option to disable threading: LinkChecker.threadsupport + * do not rely on self.mime in HttpUrlData, this could be None + * flush stdout after each log entry + * use LinkChecker.User and LinkChecker.Password in FTP connections + * make sure redirection is not cyclic + +9.1.2000 Version 0.1.0 + * HTTP request + * FTP request + * fixed MaxRecursionLevel setting + * fixed name clash of variable and function warning + * ColoredLogger + * small doc changes + * CGI and HTML files for LinkChecker Online, + but I still have to install Python on my http server + (will try this tomorrow) + +8.1.2000 + * Properties, Threader, LinkChecker, UrlData, Logging + +7.1.2000 Version 0.0.1 + * Option processing diff --git a/DNS/Base.py b/DNS/Base.py new file mode 100644 index 00000000..f464e26f --- /dev/null +++ b/DNS/Base.py @@ -0,0 +1,215 @@ +# $Id$ +import sys +import getopt +import socket +import string +import DNS,DNS.Lib,DNS.Type,DNS.Class,DNS.Opcode +#import asyncore + +defaults= { 'protocol':'udp', 'port':53, 'opcode':DNS.Opcode.QUERY, + 'qtype':DNS.Type.A, 'rd':1, 'timing':1 } + +defaults['server']=[] + +def ParseResolvConf(): + "parses the /etc/resolv.conf file and sets defaults for name servers" + import string + global defaults + lines=open("/etc/resolv.conf").readlines() + for line in lines: + string.strip(line) + if line[0]==';' or line[0]=='#': + continue + fields=string.split(line) + if fields[0]=='domain': + defaults['domain']=fields[1] + if fields[0]=='search': + pass + if fields[0]=='options': + pass + if fields[0]=='sortlist': + pass + if fields[0]=='nameserver': + defaults['server'].append(fields[1]) + + + +class DnsRequest: + def __init__(self,*name,**args): + self.donefunc=None + self.async=None + self.defaults = {} + self.argparse(name,args) + self.defaults = self.args + + def argparse(self,name,args): + if not name and self.defaults.has_key('name'): + args['name'] = self.defaults['name'] + if type(name) is type(""): + args['name']=name + else: + if len(name) == 1: + if name[0]: + args['name']=name[0] + for i in defaults.keys(): + if not args.has_key(i): + if self.defaults.has_key(i): + args[i]=self.defaults[i] + else: + args[i]=defaults[i] + if type(args['server']) == type(''): + args['server'] = [args['server']] + self.args=args + + def socketInit(self,a,b): + import socket + self.s = socket.socket(a,b) + + def processUDPReply(self): + import time + self.reply = self.s.recv(1024) + self.time_finish=time.time() + self.args['server']=self.ns + return self.processReply() + + def processTCPReply(self): + import time + self.f = self.s.makefile('r') + header = self.f.read(2) + if len(header) < 2: + raise DNS.Error,'EOF' + count = DNS.Lib.unpack16bit(header) + self.reply = self.f.read(count) + if len(self.reply) != count: + raise DNS.Error,'incomplete reply' + self.time_finish=time.time() + self.args['server']=self.ns + return self.processReply() + + def processReply(self): + import time + self.args['elapsed']=(self.time_finish-self.time_start)*1000 + u = DNS.Lib.Munpacker(self.reply) + r=DNS.Lib.DnsResult(u,self.args) + r.args=self.args + #self.args=None # mark this DnsRequest object as used. + return r + #### TODO TODO TODO #### + if protocol == 'tcp' and qtype == DNS.Type.AXFR: + while 1: + header = f.read(2) + if len(header) < 2: + print '========== EOF ==========' + break + count = DNS.Lib.unpack16bit(header) + if not count: + print '========== ZERO COUNT ==========' + break + print '========== NEXT ==========' + reply = f.read(count) + if len(reply) != count: + print '*** Incomplete reply ***' + break + u = DNS.Lib.Munpacker(reply) + DNS.Lib.dumpM(u) + + def conn(self): + self.s.connect((self.ns,self.port)) + + def req(self,*name,**args): + import time,sys + self.argparse(name,args) + #if not self.args: + # raise DNS.Error,'reinitialize request before reuse' + protocol = self.args['protocol'] + self.port = self.args['port'] + opcode = self.args['opcode'] + rd = self.args['rd'] + server=self.args['server'] + if type(self.args['qtype']) == type('foo'): + try: + qtype = eval(string.upper(self.args['qtype']), DNS.Type.__dict__) + except (NameError,SyntaxError): + raise DNS.Error,'unknown query type' + else: + qtype=self.args['qtype'] + if not self.args.has_key('name'): + print self.args + raise DNS.Error,'nothing to lookup' + qname = self.args['name'] + if qtype == DNS.Type.AXFR: + print 'Query type AXFR, protocol forced to TCP' + protocol = 'tcp' + #print 'QTYPE %d(%s)' % (qtype, DNS.Type.typestr(qtype)) + m = DNS.Lib.Mpacker() + m.addHeader(0, + 0, opcode, 0, 0, rd, 0, 0, 0, + 1, 0, 0, 0) + m.addQuestion(qname, qtype, DNS.Class.IN) + self.request = m.getbuf() + if protocol == 'udp': + self.response=None + self.socketInit(socket.AF_INET, socket.SOCK_DGRAM) + for self.ns in server: + try: + #self.s.connect((self.ns, self.port)) + self.conn() + self.time_start=time.time() + if not self.async: + self.s.send(self.request) + self.response=self.processUDPReply() + #except socket.error: + except None: + continue + break + if not self.response: + if not self.async: + raise DNS.Error,'no working nameservers found' + else: + self.response=None + for self.ns in server: + try: + self.socketInit(socket.AF_INET, socket.SOCK_STREAM) + self.time_start=time.time() + self.conn() + self.s.send(DNS.Lib.pack16bit(len(self.request)) + self.request) + self.s.shutdown(1) + self.response=self.processTCPReply() + except socket.error: + continue + break + if not self.response: + raise DNS.Error,'no working nameservers found' + if not self.async: + return self.response + +#class DnsAsyncRequest(DnsRequest,asyncore.dispatcher_with_send): +class DnsAsyncRequest(DnsRequest): + def __init__(self,*name,**args): + if args.has_key('done') and args['done']: + self.donefunc=args['done'] + else: + self.donefunc=self.showResult + self.realinit(name,args) + self.async=1 + def conn(self): + import time + self.connect(self.ns,self.port) + self.time_start=time.time() + if self.args.has_key('start') and self.args['start']: + asyncore.dispatcher.go(self) + def socketInit(self,a,b): + self.create_socket(a,b) + asyncore.dispatcher.__init__(self) + self.s=self + def handle_read(self): + if self.args['protocol'] == 'udp': + self.response=self.processUDPReply() + if self.donefunc: + apply(self.donefunc,(self,)) + def handle_connect(self): + self.send(self.request) + def handle_write(self): + pass + def showResult(self,*s): + self.response.show() diff --git a/DNS/Class.py b/DNS/Class.py new file mode 100644 index 00000000..f90b9e84 --- /dev/null +++ b/DNS/Class.py @@ -0,0 +1,23 @@ +# CLASS values (section 3.2.4) + +IN = 1 # the Internet +CS = 2 # the CSNET class (Obsolete - used only for examples in + # some obsolete RFCs) +CH = 3 # the CHAOS class +HS = 4 # Hesiod [Dyer 87] + +# QCLASS values (section 3.2.5) + +ANY = 255 # any class + + +# Construct reverse mapping dictionary + +_names = dir() +classmap = {} +for _name in _names: + if _name[0] != '_': classmap[eval(_name)] = _name + +def classstr(klass): + if classmap.has_key(klass): return classmap[klass] + else: return `klass` diff --git a/DNS/Lib.py b/DNS/Lib.py new file mode 100644 index 00000000..111fe0c6 --- /dev/null +++ b/DNS/Lib.py @@ -0,0 +1,589 @@ +# Domain Name Server (DNS) interface +# +# See RFC 1035: +# ------------------------------------------------------------------------ +# Network Working Group P. Mockapetris +# Request for Comments: 1035 ISI +# November 1987 +# Obsoletes: RFCs 882, 883, 973 +# +# DOMAIN NAMES - IMPLEMENTATION AND SPECIFICATION +# ------------------------------------------------------------------------ + + +import string + +import DNS.Type +import DNS.Class +import DNS.Opcode +import DNS.Status + + +# Low-level 16 and 32 bit integer packing and unpacking + +def pack16bit(n): + return chr((n>>8)&0xFF) + chr(n&0xFF) + +def pack32bit(n): + return chr((n>>24)&0xFF) + chr((n>>16)&0xFF) \ + + chr((n>>8)&0xFF) + chr(n&0xFF) + +def unpack16bit(s): + return (ord(s[0])<<8) | ord(s[1]) + +def unpack32bit(s): + return (ord(s[0])<<24) | (ord(s[1])<<16) \ + | (ord(s[2])<<8) | ord(s[3]) + +def addr2bin(addr): + if type(addr) == type(0): + return addr + bytes = string.splitfields(addr, '.') + if len(bytes) != 4: raise ValueError, 'bad IP address' + n = 0 + for byte in bytes: n = n<<8 | string.atoi(byte) + return n + +def bin2addr(n): + return '%d.%d.%d.%d' % ((n>>24)&0xFF, (n>>16)&0xFF, + (n>>8)&0xFF, n&0xFF) + + +# Packing class + +class Packer: + def __init__(self): + self.buf = '' + self.index = {} + def getbuf(self): + return self.buf + def addbyte(self, c): + if len(c) != 1: raise TypeError, 'one character expected' + self.buf = self.buf + c + def addbytes(self, bytes): + self.buf = self.buf + bytes + def add16bit(self, n): + self.buf = self.buf + pack16bit(n) + def add32bit(self, n): + self.buf = self.buf + pack32bit(n) + def addaddr(self, addr): + n = addr2bin(addr) + self.buf = self.buf + pack32bit(n) + def addstring(self, s): + self.addbyte(chr(len(s))) + self.addbytes(s) + def addname(self, name): + # Domain name packing (section 4.1.4) + # Add a domain name to the buffer, possibly using pointers. + # The case of the first occurrence of a name is preserved. + # Redundant dots are ignored. + list = [] + for label in string.splitfields(name, '.'): + if label: + if len(label) > 63: + raise PackError, 'label too long' + list.append(label) + keys = [] + for i in range(len(list)): + key = string.upper(string.joinfields(list[i:], '.')) + keys.append(key) + if self.index.has_key(key): + pointer = self.index[key] + break + else: + i = len(list) + pointer = None + # Do it into temporaries first so exceptions don't + # mess up self.index and self.buf + buf = '' + offset = len(self.buf) + index = [] + for j in range(i): + label = list[j] + n = len(label) + if offset + len(buf) < 0x3FFF: + index.append(keys[j], offset + len(buf)) + else: + print 'DNS.Lib.Packer.addname:', + print 'warning: pointer too big' + buf = buf + (chr(n) + label) + if pointer: + buf = buf + pack16bit(pointer | 0xC000) + else: + buf = buf + '\0' + self.buf = self.buf + buf + for key, value in index: + self.index[key] = value + def dump(self): + keys = self.index.keys() + keys.sort() + print '-'*40 + for key in keys: + print '%20s %3d' % (key, self.index[key]) + print '-'*40 + space = 1 + for i in range(0, len(self.buf)+1, 2): + if self.buf[i:i+2] == '**': + if not space: print + space = 1 + continue + space = 0 + print '%4d' % i, + for c in self.buf[i:i+2]: + if ' ' < c < '\177': + print ' %c' % c, + else: + print '%2d' % ord(c), + print + print '-'*40 + + +# Unpacking class + +UnpackError = 'DNS.Lib.UnpackError' # Exception + +class Unpacker: + def __init__(self, buf): + self.buf = buf + self.offset = 0 + def getbyte(self): + c = self.buf[self.offset] + self.offset = self.offset + 1 + return c + def getbytes(self, n): + s = self.buf[self.offset : self.offset + n] + if len(s) != n: raise UnpackError, 'not enough data left' + self.offset = self.offset + n + return s + def get16bit(self): + return unpack16bit(self.getbytes(2)) + def get32bit(self): + return unpack32bit(self.getbytes(4)) + def getaddr(self): + return bin2addr(self.get32bit()) + def getstring(self): + return self.getbytes(ord(self.getbyte())) + def getname(self): + # Domain name unpacking (section 4.1.4) + c = self.getbyte() + i = ord(c) + if i & 0xC0 == 0xC0: + d = self.getbyte() + j = ord(d) + pointer = ((i<<8) | j) & ~0xC000 + save_offset = self.offset + try: + self.offset = pointer + domain = self.getname() + finally: + self.offset = save_offset + return domain + if i == 0: + return '' + domain = self.getbytes(i) + remains = self.getname() + if not remains: + return domain + else: + return domain + '.' + remains + + +# Test program for packin/unpacking (section 4.1.4) + +def testpacker(): + N = 25 + R = range(N) + import timing + # See section 4.1.4 of RFC 1035 + timing.start() + for i in R: + p = Packer() + p.addbytes('*' * 20) + p.addname('f.ISI.ARPA') + p.addbytes('*' * 8) + p.addname('Foo.F.isi.arpa') + p.addbytes('*' * 18) + p.addname('arpa') + p.addbytes('*' * 26) + p.addname('') + timing.finish() + print round(timing.milli() * 0.001 / N, 3), 'seconds per packing' + p.dump() + u = Unpacker(p.buf) + u.getbytes(20) + u.getname() + u.getbytes(8) + u.getname() + u.getbytes(18) + u.getname() + u.getbytes(26) + u.getname() + timing.start() + for i in R: + u = Unpacker(p.buf) + res = (u.getbytes(20), + u.getname(), + u.getbytes(8), + u.getname(), + u.getbytes(18), + u.getname(), + u.getbytes(26), + u.getname()) + timing.finish() + print round(timing.milli() * 0.001 / N, 3), 'seconds per unpacking' + for item in res: print item + + +# Pack/unpack RR toplevel format (section 3.2.1) + +class RRpacker(Packer): + def __init__(self): + Packer.__init__(self) + self.rdstart = None + def addRRheader(self, name, type, klass, ttl, *rest): + self.addname(name) + self.add16bit(type) + self.add16bit(klass) + self.add32bit(ttl) + if rest: + if res[1:]: raise TypeError, 'too many args' + rdlength = rest[0] + else: + rdlength = 0 + self.add16bit(rdlength) + self.rdstart = len(self.buf) + def patchrdlength(self): + rdlength = unpack16bit(self.buf[self.rdstart-2:self.rdstart]) + if rdlength == len(self.buf) - self.rdstart: + return + rdata = self.buf[self.rdstart:] + save_buf = self.buf + ok = 0 + try: + self.buf = self.buf[:self.rdstart-2] + self.add16bit(len(rdata)) + self.buf = self.buf + rdata + ok = 1 + finally: + if not ok: self.buf = save_buf + def endRR(self): + if self.rdstart is not None: + self.patchrdlength() + self.rdstart = None + def getbuf(self): + if self.rdstart is not None: self.patchrdlenth() + return Packer.getbuf(self) + # Standard RRs (section 3.3) + def addCNAME(self, name, klass, ttl, cname): + self.addRRheader(name, DNS.Type.CNAME, klass, ttl) + self.addname(cname) + self.endRR() + def addHINFO(self, name, klass, ttl, cpu, os): + self.addRRheader(name, DNS.Type.HINFO, klass, ttl) + self.addstring(cpu) + self.addstring(os) + self.endRR() + def addMX(self, name, klass, ttl, preference, exchange): + self.addRRheader(name, DNS.Type.MX, klass, ttl) + self.add16bit(preference) + self.addname(exchange) + self.endRR() + def addNS(self, name, klass, ttl, nsdname): + self.addRRheader(name, DNS.Type.NS, klass, ttl) + self.addname(nsdname) + self.endRR() + def addPTR(self, name, klass, ttl, ptrdname): + self.addRRheader(name, DNS.Type.PTR, klass, ttl) + self.addname(ptrdname) + self.endRR() + def addSOA(self, name, klass, ttl, + mname, rname, serial, refresh, retry, expire, minimum): + self.addRRheader(name, DNS.Type.SOA, klass, ttl) + self.addname(mname) + self.addname(rname) + self.add32bit(serial) + self.add32bit(refresh) + self.add32bit(retry) + self.add32bit(expire) + self.add32bit(minimum) + self.endRR() + def addTXT(self, name, klass, ttl, list): + self.addRRheader(name, DNS.Type.TXT, klass, ttl) + for txtdata in list: + self.addstring(txtdata) + self.endRR() + # Internet specific RRs (section 3.4) -- class = IN + def addA(self, name, ttl, address): + self.addRRheader(name, DNS.Type.A, DNS.Class.IN, ttl) + self.addaddr(address) + self.endRR() + def addWKS(self, name, ttl, address, protocol, bitmap): + self.addRRheader(name, DNS.Type.WKS, DNS.Class.IN, ttl) + self.addaddr(address) + self.addbyte(chr(protocol)) + self.addbytes(bitmap) + self.endRR() + +def prettyTime(seconds): + if seconds<60: + return seconds,"%d seconds"%(seconds) + if seconds<3600: + return seconds,"%d minutes"%(seconds/60) + if seconds<86400: + return seconds,"%d hours"%(seconds/3600) + if seconds<604800: + return seconds,"%d days"%(seconds/86400) + else: + return seconds,"%d weeks"%(seconds/604800) + + +class RRunpacker(Unpacker): + def __init__(self, buf): + Unpacker.__init__(self, buf) + self.rdend = None + def getRRheader(self): + name = self.getname() + type = self.get16bit() + klass = self.get16bit() + ttl = self.get32bit() + rdlength = self.get16bit() + self.rdend = self.offset + rdlength + return (name, type, klass, ttl, rdlength) + def endRR(self): + if self.offset != self.rdend: + raise UnpackError, 'end of RR not reached' + def getCNAMEdata(self): + return self.getname() + def getHINFOdata(self): + return self.getstring(), self.getstring() + def getMXdata(self): + return self.get16bit(), self.getname() + def getNSdata(self): + return self.getname() + def getPTRdata(self): + return self.getname() + def getSOAdata(self): + return self.getname(), \ + self.getname(), \ + ('serial',)+(self.get32bit(),), \ + ('refresh ',)+prettyTime(self.get32bit()), \ + ('retry',)+prettyTime(self.get32bit()), \ + ('expire',)+prettyTime(self.get32bit()), \ + ('minimum',)+prettyTime(self.get32bit()) + def getTXTdata(self): + list = [] + while self.offset != self.rdend: + list.append(self.getstring()) + return list + def getAdata(self): + return self.getaddr() + def getWKSdata(self): + address = self.getaddr() + protocol = ord(self.getbyte()) + bitmap = self.getbytes(self.rdend - self.offset) + return address, protocol, bitmap + + +# Pack/unpack Message Header (section 4.1) + +class Hpacker(Packer): + def addHeader(self, id, qr, opcode, aa, tc, rd, ra, z, rcode, + qdcount, ancount, nscount, arcount): + self.add16bit(id) + self.add16bit((qr&1)<<15 | (opcode*0xF)<<11 | (aa&1)<<10 + | (tc&1)<<9 | (rd&1)<<8 | (ra&1)<<7 + | (z&7)<<4 | (rcode&0xF)) + self.add16bit(qdcount) + self.add16bit(ancount) + self.add16bit(nscount) + self.add16bit(arcount) + +class Hunpacker(Unpacker): + def getHeader(self): + id = self.get16bit() + flags = self.get16bit() + qr, opcode, aa, tc, rd, ra, z, rcode = ( + (flags>>15)&1, + (flags>>11)&0xF, + (flags>>10)&1, + (flags>>9)&1, + (flags>>8)&1, + (flags>>7)&1, + (flags>>4)&7, + (flags>>0)&0xF) + qdcount = self.get16bit() + ancount = self.get16bit() + nscount = self.get16bit() + arcount = self.get16bit() + return (id, qr, opcode, aa, tc, rd, ra, z, rcode, + qdcount, ancount, nscount, arcount) + + +# Pack/unpack Question (section 4.1.2) + +class Qpacker(Packer): + def addQuestion(self, qname, qtype, qclass): + self.addname(qname) + self.add16bit(qtype) + self.add16bit(qclass) + +class Qunpacker(Unpacker): + def getQuestion(self): + return self.getname(), self.get16bit(), self.get16bit() + + +# Pack/unpack Message(section 4) +# NB the order of the base classes is important for __init__()! + +class Mpacker(RRpacker, Qpacker, Hpacker): + pass + +class Munpacker(RRunpacker, Qunpacker, Hunpacker): + pass + + +# Routines to print an unpacker to stdout, for debugging. +# These affect the unpacker's current position! + +def dumpM(u): + print 'HEADER:', + (id, qr, opcode, aa, tc, rd, ra, z, rcode, + qdcount, ancount, nscount, arcount) = u.getHeader() + print 'id=%d,' % id, + print 'qr=%d, opcode=%d, aa=%d, tc=%d, rd=%d, ra=%d, z=%d, rcode=%d,' \ + % (qr, opcode, aa, tc, rd, ra, z, rcode) + if tc: print '*** response truncated! ***' + if rcode: print '*** nonzero error code! (%d) ***' % rcode + print ' qdcount=%d, ancount=%d, nscount=%d, arcount=%d' \ + % (qdcount, ancount, nscount, arcount) + for i in range(qdcount): + print 'QUESTION %d:' % i, + dumpQ(u) + for i in range(ancount): + print 'ANSWER %d:' % i, + dumpRR(u) + for i in range(nscount): + print 'AUTHORITY RECORD %d:' % i, + dumpRR(u) + for i in range(arcount): + print 'ADDITIONAL RECORD %d:' % i, + dumpRR(u) + +class DnsResult: + + def __init__(self,u,args): + self.header={} + self.questions=[] + self.answers=[] + self.authority=[] + self.additional=[] + self.args=args + self.storeM(u) + + def show(self): + import time + print '; <<>> PDG.py 1.0 <<>> %s %s'%(self.args['name'], + self.args['qtype']) + opt="" + if self.args['rd']: + opt=opt+'recurs ' + h=self.header + print ';; options: '+opt + print ';; got answer:' + print ';; ->>HEADER<<- opcode %s, status %s, id %d'%( + h['opcode'],h['status'],h['id']) + flags=filter(lambda x,h=h:h[x],('qr','aa','rd','ra','tc')) + print ';; flags: %s; Ques: %d, Ans: %d, Auth: %d, Addit: %d'%( + string.join(flags),h['qdcount'],h['ancount'],h['nscount'], + h['arcount']) + print ';; QUESTIONS:' + for q in self.questions: + print ';; %s, type = %s, class = %s'%(q['qname'],q['qtypestr'], + q['qclassstr']) + print + print ';; ANSWERS:' + for a in self.answers: + print '%-20s %-6s %-6s %s'%(a['name'],`a['ttl']`,a['typename'], + a['data']) + print + print ';; AUTHORITY RECORDS:' + for a in self.authority: + print '%-20s %-6s %-6s %s'%(a['name'],`a['ttl']`,a['typename'], + a['data']) + print + print ';; ADDITIONAL RECORDS:' + for a in self.additional: + print '%-20s %-6s %-6s %s'%(a['name'],`a['ttl']`,a['typename'], + a['data']) + print + if self.args.has_key('elapsed'): + print ';; Total query time: %d msec'%self.args['elapsed'] + print ';; To SERVER: %s'%(self.args['server']) + print ';; WHEN: %s'%time.ctime(time.time()) + + def storeM(self,u): + (self.header['id'], self.header['qr'], self.header['opcode'], + self.header['aa'], self.header['tc'], self.header['rd'], + self.header['ra'], self.header['z'], self.header['rcode'], + self.header['qdcount'], self.header['ancount'], + self.header['nscount'], self.header['arcount']) = u.getHeader() + self.header['opcodestr']=DNS.Opcode.opcodestr(self.header['opcode']) + self.header['status']=DNS.Status.statusstr(self.header['rcode']) + for i in range(self.header['qdcount']): + #print 'QUESTION %d:' % i, + self.questions.append(self.storeQ(u)) + for i in range(self.header['ancount']): + #print 'ANSWER %d:' % i, + self.answers.append(self.storeRR(u)) + for i in range(self.header['nscount']): + #print 'AUTHORITY RECORD %d:' % i, + self.authority.append(self.storeRR(u)) + for i in range(self.header['arcount']): + #print 'ADDITIONAL RECORD %d:' % i, + self.additional.append(self.storeRR(u)) + + def storeQ(self,u): + q={} + q['qname'], q['qtype'], q['qclass'] = u.getQuestion() + q['qtypestr']=DNS.Type.typestr(q['qtype']) + q['qclassstr']=DNS.Class.classstr(q['qclass']) + return q + + def storeRR(self,u): + r={} + r['name'],r['type'],r['class'],r['ttl'],r['rdlength'] = u.getRRheader() + r['typename'] = DNS.Type.typestr(r['type']) + r['classstr'] = DNS.Class.classstr(r['class']) + #print 'name=%s, type=%d(%s), class=%d(%s), ttl=%d' \ + # % (name, + # type, typename, + # klass, DNS.Class.classstr(class), + # ttl) + mname = 'get%sdata' % r['typename'] + if hasattr(u, mname): + r['data']=getattr(u, mname)() + else: + r['data']=u.getbytes(rdlength) + return r + +def dumpQ(u): + qname, qtype, qclass = u.getQuestion() + print 'qname=%s, qtype=%d(%s), qclass=%d(%s)' \ + % (qname, + qtype, DNS.Type.typestr(qtype), + qclass, DNS.Class.classstr(qclass)) + +def dumpRR(u): + name, type, klass, ttl, rdlength = u.getRRheader() + typename = DNS.Type.typestr(type) + print 'name=%s, type=%d(%s), class=%d(%s), ttl=%d' \ + % (name, + type, typename, + klass, DNS.Class.classstr(klass), + ttl) + mname = 'get%sdata' % typename + if hasattr(u, mname): + print ' formatted rdata:', getattr(u, mname)() + else: + print ' binary rdata:', u.getbytes(rdlength) + diff --git a/DNS/Opcode.py b/DNS/Opcode.py new file mode 100644 index 00000000..f2e7cd5b --- /dev/null +++ b/DNS/Opcode.py @@ -0,0 +1,16 @@ +# Opcode values in message header (section 4.1.1) + +QUERY = 0 +IQUERY = 1 +STATUS = 2 + +# Construct reverse mapping dictionary + +_names = dir() +opcodemap = {} +for _name in _names: + if _name[0] != '_': opcodemap[eval(_name)] = _name + +def opcodestr(opcode): + if opcodemap.has_key(opcode): return opcodemap[opcode] + else: return `opcode` diff --git a/DNS/Status.py b/DNS/Status.py new file mode 100644 index 00000000..8edb0830 --- /dev/null +++ b/DNS/Status.py @@ -0,0 +1,19 @@ +# Status values in message header + +NOERROR = 0 +FORMERR = 1 +SERVFAIL = 2 +NXDOMAIN = 3 +NOTIMP = 4 +REFUSED = 5 + +# Construct reverse mapping dictionary + +_names = dir() +statusmap = {} +for _name in _names: + if _name[0] != '_': statusmap[eval(_name)] = _name + +def statusstr(status): + if statusmap.has_key(status): return statusmap[status] + else: return `status` diff --git a/DNS/Type.py b/DNS/Type.py new file mode 100644 index 00000000..6f019b10 --- /dev/null +++ b/DNS/Type.py @@ -0,0 +1,42 @@ +# TYPE values (section 3.2.2) + +A = 1 # a host address +NS = 2 # an authoritative name server +MD = 3 # a mail destination (Obsolete - use MX) +MF = 4 # a mail forwarder (Obsolete - use MX) +CNAME = 5 # the canonical name for an alias +SOA = 6 # marks the start of a zone of authority +MB = 7 # a mailbox domain name (EXPERIMENTAL) +MG = 8 # a mail group member (EXPERIMENTAL) +MR = 9 # a mail rename domain name (EXPERIMENTAL) +NULL = 10 # a null RR (EXPERIMENTAL) +WKS = 11 # a well known service description +PTR = 12 # a domain name pointer +HINFO = 13 # host information +MINFO = 14 # mailbox or mail list information +MX = 15 # mail exchange +TXT = 16 # text strings +AAAA = 28 # IPv6 AAAA records (RFC 1886) + +# Additional TYPE values from host.c source + +UNAME = 110 +MP = 240 + +# QTYPE values (section 3.2.3) + +AXFR = 252 # A request for a transfer of an entire zone +MAILB = 253 # A request for mailbox-related records (MB, MG or MR) +MAILA = 254 # A request for mail agent RRs (Obsolete - see MX) +ANY = 255 # A request for all records + +# Construct reverse mapping dictionary + +_names = dir() +typemap = {} +for _name in _names: + if _name[0] != '_': typemap[eval(_name)] = _name + +def typestr(type): + if typemap.has_key(type): return typemap[type] + else: return `type` diff --git a/DNS/__init__.py b/DNS/__init__.py new file mode 100644 index 00000000..324d581a --- /dev/null +++ b/DNS/__init__.py @@ -0,0 +1,10 @@ +# __init__.py for DNS class. + +Error='DNS API error' +import Type,Opcode,Status,Class +from Base import * +from Lib import * +from lazy import * +Request = DnsRequest +Result = DnsResult + diff --git a/DNS/asyncore.py b/DNS/asyncore.py new file mode 100644 index 00000000..dd4d765d --- /dev/null +++ b/DNS/asyncore.py @@ -0,0 +1,266 @@ +# -*- Mode: Python; tab-width: 4 -*- +# $Id$ +# Author: Sam Rushing + +# A simple unix version of the asynchronous socket support. +# There are lots of problems with this still - I only wrote it to show +# that it could be done, and for my own testing purposes. +# [960206: servtest, asynfing, asynhttp, and pop3demo work, asyndns doesn't.] +# [960321: servtest, asynfing, asynhttp, pop3demo, pop3_2 work] +import select +import socket +import sys + +# you need to generate ERRNO.py from Tools/scripts/h2py.py in the Python +# distribution. + +try: + import ERRNO +except ImportError: + raise ImportError,'you need to generate ERRNO.py from Tools/scripts/h2py.py in the Python distribution' + +# look what I can get away with... 8^) +socket.socket_map = {} + +ALL_EVENTS = [] + +DEFAULT_TIMEOUT = 30.0 + +loop_running = 0 + +stop_loop_exception = "stop running the select loop" + +# we want to select for read only those sockets +# to which we are already connected to, -OR- those +# sockets we are accepting on. +def readables (sock_fds): + sm = socket.socket_map + def readable_test (fd, sm=sm): + sock = sm[fd] + return sock.connected or sock.accepting + return filter (readable_test, sock_fds) + +# only those fd's we are 'write blocked' on, -OR- +# those sockets we are waiting for a connection on. +def writables (sock_fds): + sm = socket.socket_map + def writable_test (fd, sm=sm): + sock = sm[fd] + return sock.write_blocked or not sock.connected + return filter (writable_test, sock_fds) + +def loop(timeout=DEFAULT_TIMEOUT): + loop_running = 1 + try: + while 1: + sock_fds = socket.socket_map.keys() + + read_fds = readables (sock_fds) + write_fds = writables (sock_fds) + expt_fds = sock_fds[:] + + (read_fds, + write_fds, + expt_fds) = select.select (read_fds, + write_fds, + expt_fds, + timeout) + print read_fds,write_fds,expt_fds + try: + for x in expt_fds: + socket.socket_map[x].handle_expt_event() + for x in read_fds: + socket.socket_map[x].handle_read_event() + for x in write_fds: + socket.socket_map[x].handle_write_event() + except KeyError: + # handle_expt handle_read might remove as socket + # from the map by calling self.close(). + pass + except stop_loop_exception: + print 'loop stopped' + +class dispatcher: + def __init__ (self, sock=None): + self.debug = 0 + self.log_queue = [] + self.connected = 0 + self.accepting = 0 + self.write_blocked = 1 + if sock: + self.socket = sock + self.fileno = self.socket.fileno() + # I think it should inherit this anyway + self.socket.setblocking (0) + self.connected = 1 + self.add_channel() + + def add_channel (self, events=ALL_EVENTS): + self.log ('adding channel %s' % self) + socket.socket_map [self.fileno] = self + + def del_channel (self): + if socket.socket_map.has_key (self.fileno): + del socket.socket_map [self.fileno] + if not len(socket.socket_map.keys()): + raise stop_loop_exception + + def create_socket (self, family, type): + self.socket = socket.socket (family, type) + self.socket.setblocking(0) + self.fileno = self.socket.fileno() + self.add_channel() + + def bind (self, *args): + return apply (self.socket.bind, args) + + def go (self): + if not loop_running: + loop() + + def listen (self, num): + self.accepting = 1 + self.socket.listen (num) + + def accept (self): + return self.socket.accept() + + def connect (self, host, port): + try: + self.socket.connect (host, port) + except socket.error, why: + if type(why) == type(()) \ + and why[0] in (ERRNO.EINPROGRESS, ERRNO.EALREADY, ERRNO.EWOULDBLOCK): + return + else: + raise socket.error, why + self.connected = 1 + self.handle_connect() + + def send (self, data): + try: + result = self.socket.send (data) + if result != len(data): + self.write_blocked = 1 + else: + self.write_blocked = 0 + return result + except socket.error, why: + if type(why) == type(()) and why[0] == ERRNO.EWOULDBLOCK: + self.write_blocked = 1 + return 0 + else: + raise socket.error, why + return 0 + + def recv (self, buffer_size): + data = self.socket.recv (buffer_size) + if not data: + self.handle_close() + return '' + else: + return data + + def close (self): + self.socket.close() + self.del_channel() + + def shutdown (self, how): + self.socket.shutdown (how) + + def log (self, message): + #self.log_queue.append ('%s:%d %s' % + # (self.__class__.__name__, self.fileno, message)) + print 'log:', message + + def done (self): + self.print_log() + + def print_log (self): + for x in self.log_queue: + print x + + def handle_read_event (self): + # getting a read implies that we are connected + if not self.connected: + self.handle_connect() + self.connected = 1 + self.handle_read() + elif self.accepting: + if not self.connected: + self.connected = 1 + self.handle_accept() + else: + self.handle_read() + + def more_to_send (self, yesno=1): + self.write_blocked = yesno + + def handle_write_event (self): + # getting a read implies that we are connected + if not self.connected: + self.handle_connect() + self.connected = 1 + self.write_blocked = 0 + self.handle_write() + + def handle_expt_event (self): + self.handle_error() + + def handle_error (self, error=0): + self.close() + + def handle_read (self): + self.log ('unhandled FD_READ') + + def handle_write (self): + self.log ('unhandled FD_WRITE') + + def handle_connect (self): + self.log ('unhandled FD_CONNECT') + + def handle_oob (self): + self.log ('unhandled FD_OOB') + + def handle_accept (self): + self.log ('unhandled FD_ACCEPT') + + def handle_close (self): + self.log ('unhandled FD_CLOSE') + + def handle_disconnect (self, error): + self.log ('unexpected disconnect, error:%d' % error) + +# --------------------------------------------------------------------------- +# adds async send capability, useful for simple clients. +# --------------------------------------------------------------------------- + +class dispatcher_with_send (dispatcher): + def __init__ (self, sock=None): + dispatcher.__init__ (self, sock) + self.out_buffer = '' + + def initiate_send (self): + while self.out_buffer: + num_sent = 0 + num_sent = dispatcher.send (self, self.out_buffer[:512]) + self.out_buffer = self.out_buffer[num_sent:] + + def handle_write (self): + self.initiate_send() + + def send (self, data): + if self.debug: + self.log ('sending %s' % repr(data)) + self.out_buffer = data + self.initiate_send() + +# --------------------------------------------------------------------------- +# used a lot when debugging +# --------------------------------------------------------------------------- + +def close_all (): + for x in socket.socket_map.items(): + x[1].socket.close() + socket.socket_map = {} + diff --git a/DNS/lazy.py b/DNS/lazy.py new file mode 100644 index 00000000..49e31e05 --- /dev/null +++ b/DNS/lazy.py @@ -0,0 +1,24 @@ +# $Id$ +# routines for lazy people. +import Base + +def revlookup(name): + "convenience routine for doing a reverse lookup of an address" + import string + a = string.split(name, '.') + a.reverse() + b = string.join(a, '.')+'.in-addr.arpa' + # this will only return one of any records returned. + return Base.DnsRequest(b, qtype = 'ptr').req().answers[0]['data'] + +def mxlookup(name): + """ + convenience routine for doing an MX lookup of a name. returns a + sorted list of (preference, mail exchanger) records + """ + + a = Base.DnsRequest(name, qtype = 'mx').req().answers + l = map(lambda x:x['data'], a) + l.sort() + return l + diff --git a/GML/GMLLexer.py b/GML/GMLLexer.py new file mode 100644 index 00000000..39fc57bf --- /dev/null +++ b/GML/GMLLexer.py @@ -0,0 +1,43 @@ +import sys,re +import PyLR + +def _intfunc(m): + return int(m.group(0)) + +def _realfunc(m): + return float(m.group(0)) + +class GMLLexer(PyLR.Lexer): + """The GML lexical scanner.""" + def __init__(self): + PyLR.Lexer.__init__(self) + self.addpat(r"[-+]?(\d+\.\d*|\d*\.\d+)([Ee][-+]?\d+)?", + "REAL", _realfunc) + self.addpat(r"[-+]?\d+", "INT", _intfunc) + self.addpat(r"\[", "LSQB") + self.addpat(r"\]", "RSQB") + self.addpat(r'"([^&"]+|&[a-zA-Z]+;)*"', "STRING") + self.addpat(r"[a-zA-Z][a-zA-Z0-9]*", "KEY") + self.addpat(r"#[^\n]*", "", None, PyLR.SKIPTOK) + self.addpat(r"\s+", "", None, PyLR.SKIPTOK) + +def _test(): + gmltest = """# a graph example + graph [ # comment at end of line + node [ + real1 1.e3 + real2 .01 + int1 00050 + label "Wallerfang&Ballern" + ] + ] + """ + # create the lexer + lexer = GMLLexer() + lexer.settext(gmltest) + tok=1 + while tok: + tok, val = lexer.scan(1) + +if __name__ == '__main__': + _test() diff --git a/GML/grammarspec.txt b/GML/grammarspec.txt new file mode 100644 index 00000000..5c995a21 --- /dev/null +++ b/GML/grammarspec.txt @@ -0,0 +1,45 @@ +# a GML parser +# Here is the GML grammar +# corrected from me because the original at +# http://www.uni-passau.de/Graphlet/GML had some errors +# +# corrections are +# (1) use instring* in string +# (2) add character,lowercase,uppercase definitions +# (3) skip whitespace definition, this is obvious +# (4) use digit+ in mantissa +# (5) either intpart or fraction of a real must contain a number +# (6) comments can be on a separate or at the end of the line +# +# gml: list +# list: (whitespace* key whitespace+ value)* +# value: integer | real | string | "[" list "]" +# key: character (character | digit)* +# integer: sign digit+ +# real: sign (digit+ "." digit* | digit* "." digit+) mantissa +# string: """ instring* """ +# sign: "+" | "-" | +# digit: "0"..."9" +# character: lowercase | uppercase +# lowercase: "a"..."z" +# uppercase: "A"..."Z" +# mantissa: ("E"|"e") sign digit+ | +# instring: | "&" character+ ";" +# +# Note that integers and reals can have prefixed zeros, e.g. 001 is 1 + +_class GMLParser +_code import GMLLexer +_lex GMLLexer.GMLLexer() + +# manually reduced +""" +list: list KEY value (key_value) | + (endoflist) ; +value: INTEGER | + REAL | + STRING | + LSQB list RSQB (beginlist) ; +""" + + diff --git a/INSTALL b/INSTALL new file mode 100644 index 00000000..3800a3b6 --- /dev/null +++ b/INSTALL @@ -0,0 +1,28 @@ + LinkChecker installation + ========================== + +First, decompress the archive. +With linkchecker-x.x.x.tar.bz2 do "tar xIvf linkchecker-x.x.x.tar.bz2". +With linkchecker-x.x.x.zip do "unzip linkchecker-x.x.x.zip" or use Winzip. +With linkchecker-x.x.x.deb do "dpkg -i linkchecker-x.x.x.deb" as root and you +are done. + +Unix Users: +1. Edit the file linkchecker. + Adjust the argument to sys.path.append to point to the distribution + directory. +2. Copy linkchecker to a location in your PATH (or make a symlink). +3. Check links happily by typing `linkchecker`. + +Windows Users: +1. Edit the file linkchecker. + Adjust the argument to sys.path.append to point to the distribution + directory. +2. Edit the file linkchecker.bat. + a) Adjust the PYHTON variable to point to python.exe. + b) Adjust the LINKCHECKER variable to point to the distribution directory. +3. Add the distribution directory to your PATH. +4. Check links happily by typing `linkchecker.bat`. + +You need Python >= 1.5.2 +You get Python from http://www.python.org diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..e77696ae --- /dev/null +++ b/LICENSE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..7eafc515 --- /dev/null +++ b/Makefile @@ -0,0 +1,53 @@ +VERSION=0.9.0 +HOST=treasure.calvinsplayground.de +#HOST=fsinfo.cs.uni-sb.de +PACKAGE = linkchecker +BZ2PACKAGE = $(PACKAGE)-$(VERSION).tar.bz2 +DEBPACKAGE = $(PACKAGE)_$(VERSION)_i386.deb +ZIPPACKAGE = $(PACKAGE)-$(VERSION).zip +ALLPACKAGES = ../$(BZ2PACKAGE) ../$(DEBPACKAGE) ../$(ZIPPACKAGE) +.PHONY: test clean files install all +TAR = tar +ZIP = zip +prefix = /usr/local + +all: + +clean: + rm -f $(ALLPACKAGES) $(PACKAGE)-out.* + +files: all + ./$(PACKAGE) -q -Wtext -Whtml -Wgml -Wsql -R -r2 -v -i "$(HOST)" http://$(HOST)/~calvin/ + +install: install-dirs + install -m644 linkcheck/*.py? $(DESTDIR)/usr/share/$(PACKAGE)/linkcheck + install -m644 DNS/*.py? $(DESTDIR)/usr/share/$(PACKAGE)/DNS + install -m644 *.py? $(DESTDIR)/usr/share/$(PACKAGE) + install -m755 $(PACKAGE) $(DESTDIR)/usr/bin + install -m644 $(PACKAGE)rc $(DESTDIR)/etc + +install-dirs: + install -d -m755 \ + $(DESTDIR)/usr/share/$(PACKAGE)/linkcheck \ + $(DESTDIR)/usr/share/$(PACKAGE)/DNS \ + $(DESTDIR)/usr/share/$(PACKAGE)/GML \ + $(DESTDIR)/usr/share/$(PACKAGE)/PyLR \ + $(DESTDIR)/usr/bin \ + $(DESTDIR)/etc + +dist: files + dh_clean + cd .. && $(TAR) cIhf $(BZ2PACKAGE) $(PACKAGE) + cd .. && $(ZIP) -r $(ZIPPACKAGE) $(PACKAGE) + fakeroot debian/rules binary + +package: + cd .. && $(TAR) cIhf $(BZ2PACKAGE) $(PACKAGE) + +test: + rm -f test/*.result + @for i in test/*.html; do \ + echo "Testing $$i. Results are in $$i.result"; \ + ./$(PACKAGE) -v -a $$i > $$i.result 2>&1; \ + done + diff --git a/PyLR/Grammar.py b/PyLR/Grammar.py new file mode 100644 index 00000000..3d8d1fa8 --- /dev/null +++ b/PyLR/Grammar.py @@ -0,0 +1,853 @@ +__version__ = "$Id$" + +import time,string,types,parsertemplate + +class PyLRParseError(ParseError): + pass + +class Production: + """Production -- a Grammar is really just a list of productions. + The expected structure is a symbol for the LHS and a list of + symbols or symbols for the RHS.""" + def __init__(self, LHS, RHS, funcname="unspecified"): + self.LHS = LHS + self.RHS = RHS + self.funcname = funcname + self.func = None # will be assigned dynamically + self.toklist = None + + def setfunc(self, func): + """.setfunc() --used for the dynamic production + of a parseengine directly from Grammar.mkengine(), instead of tables + saved to a file.""" + self.func = func + + def setfuncname(self, name): + """.setfuncname("") -- used by Grammar.writefile to produce + prodinfo table that. .setfunc associates a function value + with the production for runtime, on the fly productions + of parsing engine from Grammar.""" + self.funcname = name + + def __len__(self): + return len(self.RHS) + + def __repr__(self): + return self.getrep() + + def getrep(self, toklist=None): + s = self.LHS+":" + for t in self.RHS: + if type(t)==types.IntType and toklist: + s = s+" "+toklist[t] + else: + s = s+" "+str(t) + if self.funcname: s = s+" ("+self.funcname+")" + return s + + def items(self): + return range(len(self.RHS) + 1) + + +class LR1Grammar: + """Provides methods for producing the actiontable, the gototable, and the + prodinfo table. Using these functions, it can produce a python source + code file with these tables or a parsing engine. + Note that we assume the first production (productions[0]) to be the start + symbol.""" + + EPS = "" + EOF = "" + DummyLA = -1 + + def __init__(self, productions, tokens=[], verbose=0): + self.verbose = verbose + self.productions = productions + self.tokens = tokens + self.nonterminals = [] + for p in self.productions: + if p.LHS not in self.nonterminals: + self.nonterminals.append(p.LHS) + if self.verbose: + print "Nonterminals:", self.nonterminals + self.terminals = [] + for p in self.productions: + for s in p.RHS: + if not (s in self.terminals or s in self.nonterminals): + self.terminals.append(s) + self.terminals.sort() + if self.verbose: + print "Terminals:", self.terminals + # reduce the grammar + self._reduceGrammar() + # build map with productions who have the same LHS + self.lhsprods = {} + for lhs in self.nonterminals: + self.lhsprods[lhs] = filter(lambda x,l=lhs: x.LHS==l, self.productions) + # immediate epsilon productions + pi = 1 + self.epslhs = {} + for p in self.productions: + if p.RHS == []: + self.epslhs[p.LHS] = pi + pi = pi + 1 + # derived epsilon productions + self.lhsdereps = self._mklhsdereps() + # the FIRST function for the LR(1) grammar, implemented as a map + self.firstmap = self._mkfirstmap() + + def _reduceGrammar(self): + """Definitions: + (1) not productive + a nonterminal A is not productive iff there is no + word u with A ==>* u + This means A produces no words in the grammar. + (2) not reachable + a nonterminal A is no reachable iff there are no words + a,b with S ==>* aAb + This means A occurs never in a parsetree if we derive a word. + + This function eliminates all nonterminals which are not productive + or not reachable. + If we reduce the start symbol, the grammar produces nothing and + a ParseException is thrown. + + References: [R. Wilhelm, D.Maurer: "Ubersetzerbau, p. 300f] + """ + # productive nonterminals + productive_nts = [] + # rest_nt[p] == the number of nonterminals in p.RHS which are not yet + # marked as productive + # if rest_nt[p]==0 then p is productive + rest_nt = {} + # if we find a productive nonterminal A, we have to inspect all + # other nonterminals with A. this is the reason we add all found + # productive nts to this list + workedon_nts = [] + # mark terminals as productive (even epsilon-prductions) + for p in self.productions: + rest_nt[p]= len(filter(lambda x, s=self: x in s.nonterminals, p.RHS)) + if rest_nt[p]==0: + productive_nts[p] = 1 + workedon_nts.append(p) + # work on the productive list + while len(workedon_nts): + x = workedon_nts[0] + # search for production p with x in p.RHS + for p in filter(lambda p, _x=x: _x in p.RHS, self.productions): + rest_nt[p] = rest_nt[p] - 1 + if not p.LHS in productive_nts: + productive_nts.append(p.LHS) + workedon_nts.append(p.LHS) + workedon_nts.remove(x) + if not self.productions[0].LHS in productive_nts: + raise PyLRParseError, "start symbol of grammar is not productive" + + # reachable nonterminals + reachable_nts = self.productions[0] + added=1 + while added: + added = 0 + for p in self.productions: + for r in p.RHS: + if p.LHS in reachable_nts and (r in self.nonterminals and + r not in reachable_nts): + reachable_nts.append(r) + added = 1 + + # reduce the grammar + self.productions = filter(lambda p, + pnt=productive_nts, + rnt=reachable_nts: p.LHS in pnt or p.LHS in rnt, + self.productions) + + def __repr__(self): + """I like functional programming :)""" + return string.join(map(lambda x,s=self: x.getrep(s.tokens), + self.productions),";\n")+";" + + def _mklhsdereps(self): + """determines the nonterminals that derive nothing (epsilon)""" + pi = 1 + res = {} + for p in self.productions: + if p.RHS == []: + res[p.LHS] = pi + pi = pi + 1 + workingnonterms = [] + for nt in self.nonterminals: + if not res.has_key(nt): + workingnonterms.append(nt) + while 1: + toremove = [] + for nt in workingnonterms: + if not res.has_key(nt): + for p in self.lhsprods[nt]: + if len(p.RHS) == 1 and res.has_key(p.RHS[0]): + res[p.LHS] = res[p.RHS[0]] + toremove.append(nt) + break + if not toremove: + break + for r in toremove: + workingnonterms.remove(r) + return res + + + def _mkfirstmap(self): + """return a dictionary keyed by symbol whose values are the set + of terminals that can precede that symbol + """ + res = {} + for sym in self.terminals+[Grammar.EPS, Grammar.EOF, Grammar.DummyLA]: + res[sym] = {sym: 1} + added=1 + while added: + added = 0 + for nt in self.nonterminals: + firsts = res.get(nt, {}) + for p in self.lhsprods[nt]: + if not p.RHS: + if not firsts.has_key(Grammar.EPS): + added = firsts[Grammar.EPS] = 1 + for i in range(len(p.RHS)): + f = res.get(p.RHS[i], {}) + for t in f.keys(): + if not firsts.has_key(t): + added = firsts[t] = 1 + if not self.lhsdereps.has_key(p.RHS[i]): + break + res[nt] = firsts + for s in res.keys(): + res[s] = res[s].keys() + return res + + + # these function are used as the grammar produces the tables (or writes + # them to a file) + def firstofstring(self, gs_list): + tmpres = {} + allhaveeps = 1 + for x in range(len(gs_list)): + tmp = self.firstmap[gs_list[x]] + for s in tmp: + tmpres[s] = 1 + if Grammar.EPS in tmp: + del tmpres[Grammar.EPS] + else: + allhaveeps = 0 + break + if allhaveeps: + tmpres[Grammar.EPS] = 1 + return tmpres.keys() + + + + def augment(self): + """this function adds a production S' -> S to the grammar where S was + the start symbol. + """ + lhss = map(lambda x: x.LHS, self.productions) + newsym = self.productions[0].LHS + while 1: + newsym = newsym + "'" + if newsym not in lhss: + break + self.productions.insert(0, Production(newsym, + [self.productions[0].LHS])) + + + # follow is not used yet, but probably will be in determining error reporting/recovery + def follow(self): + eof = Grammar.EOF + follow = {} + startsym = self.productions[0].LHS + follow[startsym] = [eof] + nts = self.nonterminals + for p in self.productions: + cutoff = range(len(p.RHS)) + cutoff.reverse() + for c in cutoff[:-1]: # all but the first of the RHS elements + f = self.firstmap[p.RHS[c]] + if Grammar.EPS in f: + f.remove(Grammar.EPS) + if follow.has_key(p.RHS[c - 1]): + if p.RHS[c -1] in nts: + follow[p.RHS[c -1]] = follow[p.RHS[c - 1]] + f[:] + else: + if p.RHS[c -1] in nts: + follow[p.RHS[c - 1]] = f[:] + for p in self.productions: + if not p.RHS: continue + cutoff = range(len(p.RHS)) + cutoff.reverse() + if p.RHS[-1] in nts: + if follow.has_key(p.LHS): + add = follow[p.LHS] + else: + add = [] + + if follow.has_key(p.RHS[-1]): + follow[p.RHS[-1]] = follow[p.RHS[-1]] + add + else: + follow[p.RHS[-1]] = add + for c in cutoff[:-1]: + f = self.firstmap[p.RHS[c]] + if Grammar.EPS in f: + if follow.has_key(p.LHS): + add = follow[p.LHS] + else: + add = [] + if follow.has_key(p.RHS[c-1]): + follow[p.RHS[c-1]] = follow[p.RHS[c-1]] + add + elif add: + follow[p.RHS[c - 1]] = add + for k in follow.keys(): + d = {} + for i in follow[k]: + d[i] = 1 + follow[k] = d.keys() + return follow + + def closure(self, items): + res = items[:] + todo = items[:] + more = 1 + while more: + more = [] + for (prodind, rhsind), term in todo: + if rhsind >= len(self.productions[prodind].RHS): + continue + for p in self.lhsprods.get(self.productions[prodind].RHS[rhsind], []): + try: + newpart = self.productions[prodind].RHS[rhsind + 1] + except IndexError: + newpart = Grammar.EPS + stringofsyms = [newpart, term] + for t in self.firstofstring(stringofsyms): + if ((self.productions.index(p), 0), t) not in res: + more.append(((self.productions.index(p), 0), t)) + if term == Grammar.EOF and newpart == Grammar.EPS: + if ((self.productions.index(p), 0), Grammar.EOF) not in res: + more.append(((self.productions.index(p), 0), Grammar.EOF)) + if more: + res = res + more + todo = more + return res + + def goto(self, items, sym): + itemset = [] + for (prodind, rhsind), term in items: + try: + if self.productions[prodind].RHS[rhsind] == sym and ((prodind, rhsind+1), term) not in itemset: + itemset.append( ((prodind, rhsind +1), term)) + except IndexError: + pass + return self.closure(itemset) + + def default_prodfunc(self): + """for mkengine, this will produce a default function for those + unspecified + """ + return lambda *args: args[0] + + def prodinfotable(self): + """returns a list of three pieces of info for each production. + The first is the lenght of the production, the second is the + function(name) associated with the production and the third is + is the index of the lhs in a list of nonterminals. + """ + res = [] + for p in self.productions: + lhsind = self.nonterminals.index(p.LHS) + func = p.func + if not func: + func = self.default_prodfunc() + plen = len(p.RHS) + if p.RHS == [Grammar.EPS]: + plen = 0 + res.append((plen, func, lhsind)) + return res + + +class LALRGrammar(LR1Grammar): + def __init__(self, prods, toks=[]): + Grammar.__init__(self, prods, toks) + self.LALRitems = [] + # + # this is to help mak epsilon productions work with kernel items + # and to compute goto transitions from kernel + print "computing ntfirsts..." + self.ntfirstmap = self._mkntfirstmap() + # + # this is to help make shifts work with only kernel items + print "computing tfirsts..." + self.tfirstmap = self._mktfirstmap() + # + # another thing to help epsilon productions + print "computing follows..." + self.followmap = self.follow() + + def _mkntfirstmap(self): + """computes all nonterms A, first of (strings n) such that some + nonterminal B derives [A, n] in zero or more steps of (rightmost) + derivation. used to help make epsilon productions quickly calculable. + (B may == A) + """ + res = {} + for p in self.productions: + if p.RHS and p.RHS[0] in self.nonterminals: + fos = self.firstofstring(p.RHS[1:]) + fos.sort() + if not res.has_key(p.LHS): + res[p.LHS] = {} + if not res[p.LHS].has_key(p.RHS[0]): + res[p.LHS][p.RHS[0]] = [] + for i in fos: + if i not in res[p.LHS].get(p.RHS[0], []): + res[p.LHS][p.RHS[0]] = fos + + while 1: + foundmore = 0 + reskeys = res.keys() + for nt in reskeys: + rhsdict = res[nt] + for rnt in rhsdict.keys(): + if rnt in reskeys: + d = res[rnt] + for k in d.keys(): + if not res[nt].has_key(k): + fos = self.firstofstring(d[k]+ res[nt][rnt]) + foundmore = 1 + fos.sort() + res[nt][k] = fos + else: + fos = self.firstofstring(d[k] + res[nt][rnt]) + fos.sort() + if fos != res[nt][k]: # then res[nt][k] is contained in fos + foundmore = 1 + res[nt][k] = fos + if not foundmore: + break + # + # this part accounts for the fact that a nonterminal will + # produce exactly itself in zero steps + # + for p in self.productions: + if res.has_key(p.LHS): + res[p.LHS][p.LHS] = [Grammar.EPS] + else: + res[p.LHS] = {p.LHS: [Grammar.EPS]} + return res + + def newmkntfirstmap(self): + """computes all nonterms A, first of (strings n) such that some + nonterminal B derives [A, n] in zero or more steps of (rightmost) + derivation. used to help make epsilon productions quickly calculable. + (B may == A) + """ + res = {} + pi = 0 + for p in self.productions: + if p.RHS and p.RHS[0] in self.nonterminals: + if not res.has_key(p.LHS): + res[p.LHS] = {} + if not res[p.LHS].has_key(p.RHS[0]): + res[p.LHS][p.RHS[0]] = 1 + while 1: + foundmore = 0 + reskeys = res.keys() + for nt in reskeys: + rhsdict = res[nt] + for rnt in rhsdict.keys(): + if rnt in reskeys: + d = res[rnt] + for k in d.keys(): + if not res[nt].has_key(k): + foundmore = 1 + res[nt][k] = 1 + if not foundmore: + break + # + # this part accounts for the fact that a nonterminal will + # produce exactly itself in zero steps + # + for p in self.productions: + if res.has_key(p.LHS): + res[p.LHS][p.LHS] = 1 + else: + res[p.LHS] = {p.LHS: 1} + return res + + + + def _mktfirstmap(self): + """for each nonterminal C, compute the set of all terminals a, such + that C derives ax in zero or more steps of (rightmost) derivation + where the last derivation is not an epsilon (empty) production. + + assumes .mkfirstntmap() has been run and has already produced + self.ntfirstmap + """ + res = {} + for p in self.productions: + if not res.has_key(p.LHS): + res[p.LHS] = [] + if p.RHS and p.RHS[0] in self.terminals: + res[p.LHS].append(p.RHS[0]) + while 1: + foundmore = 0 + reskeys = res.keys() + for nt in self.ntfirstmap.keys(): + arrows = self.ntfirstmap[nt] + for k in arrows.keys(): + for t in res[k]: + if t not in res[nt]: + foundmore = 1 + res[nt].append(t) + if not foundmore: + break + return res + + def goto(self, itemset, sym): + res = [] + for (pi, ri) in itemset: + if ri == len(self.productions[pi].RHS): + continue + s = self.productions[pi].RHS[ri] + if s == sym: + res.append((pi, ri+1)) + d = self.ntfirstmap.get(s, {}) + for k in d.keys(): + for p in self.lhsprods[k]: + if p.RHS and p.RHS[0] == sym: + i = self.productions.index(p) + if (i, 1) not in res: res.append((i, 1)) + res.sort() + return res + + def lookaheads(self, itemset): + setsofitems = kernels = self.kernelitems + spontaneous = [] + propagates = {} + gotomap = {} + for (kpi, kri) in itemset: + C = self.closure([((kpi, kri), Grammar.DummyLA)]) + for (cpi, cri), t in C: + if (cri) == len(self.productions[cpi].RHS): + continue + s = self.productions[cpi].RHS[cri] + if gotomap.has_key(s): + newstate = gotomap[s] + else: + newstate = setsofitems.index(self.goto(itemset, s)) + gotomap[s] = newstate + if t != Grammar.DummyLA: + spontaneous.append((newstate, (cpi, cri+1), t)) + else: + if propagates.has_key((kpi, kri)): + propagates[(kpi, kri)].append((newstate, (cpi, cri+1))) + else: + propagates[(kpi, kri)]=[(newstate, (cpi, cri+1))] + return spontaneous, propagates + + def kernelsoflalr1items(self): + res = [[(0, 0)]] + todo = [[(0, 0)]] + while 1: + newtodo = [] + for items in todo: + for s in self.terminals + self.nonterminals + [Grammar.EOF]: + g = self.goto(items, s) + if g and g not in res: + newtodo.append(g) + if not newtodo: + break + else: + if self.verbose: + print "found %d more kernels" % (len(newtodo)) + res = res + newtodo + todo = newtodo + res.sort() + return res + + def initLALR1items(self): + self.kernelitems = kernels = self.kernelsoflalr1items() + props = {} + la_table = [] + for x in range(len(kernels)): + la_table.append([]) + for y in range(len(kernels[x])): + la_table[x].append([]) + la_table[0][0] = [Grammar.EOF] + if self.verbose: + print "initLALR1items, kernels done, calculating propagations and spontaneous lookaheads" + state_i = 0 + for itemset in kernels: + if self.verbose: + print ".", + sp, pr = self.lookaheads(itemset) + for ns, (pi, ri), t in sp: + inner = kernels[ns].index((pi, ri)) + la_table[ns][inner].append(t) + props[state_i] = pr + state_i = state_i + 1 + return la_table, props + + def LALR1items(self): + la_table, props = self.initLALR1items() + if self.verbose: + print "done init LALR1items" + soi = self.kernelitems + while 1: + added_la = 0 + state_i = 0 + for state in la_table: + ii = 0 + for propterms in state: + if not propterms: + ii = ii + 1 + continue + item = soi[state_i][ii] + ii = ii + 1 + try: + proplist = props[state_i][item] + except KeyError: + continue + for pstate, pitem in proplist: + inner = soi[pstate].index(pitem) + for pt in propterms: + if pt not in la_table[pstate][inner]: + added_la = 1 + la_table[pstate][inner].append(pt) + state_i = state_i + 1 + if not added_la: + break + # + # this section just reorganizes the above data + # to the state it's used in later... + # + if self.verbose: + print "done with lalr1items, reorganizing the data" + res = [] + state_i = 0 + for state in soi: + item_i = 0 + inner = [] + for item in state: + for term in la_table[state_i][item_i]: + if (item, term) not in inner: + inner.append((item, term)) + item_i = item_i + 1 + inner.sort() + res.append(inner) + state_i = state_i + 1 + self.LALRitems = res + return res + + def deriveN(self, nt1, nt2): + """ + assuming nt1 -> nt2 , what is ? such that + we know it as 1) a set of terminals and 2) whether it contains + Grammar.EPS + """ + pass + + def actiontable(self): + items = self.LALRitems + res = [] + state_i = 0 + terms = self.terminals[:] + terms.append(Grammar.EOF) + errentry = ("", -1) + for state in items: + list = [errentry] * len(terms) + res.append(list) + for (prodind, rhsind), term in state: + if (rhsind ) == len(self.productions[prodind].RHS): + if prodind != 0: + new = ("r", prodind) + old = res[state_i][terms.index(term)] + if old != errentry and old != new: + print "Conflict[%d,%d]:" % (state_i, terms.index(term)), old, "->", new + res[state_i][terms.index(term)] = new + else: + new = ("a", -1) + old = res[state_i][terms.index(term)] + if old != errentry and old != new: + print "Conflict[%d,%d]:" % (state_i, terms.index(term)), old, "->", new + res[state_i][terms.index(term)] = new + # + # calculate reduction by epsilon productions + # + elif self.productions[prodind].RHS[rhsind] in self.nonterminals: + nt = self.productions[prodind].RHS[rhsind] + ntfirst = self.firstmap[nt] + ntfirsts = self.ntfirstmap.get(nt, {}) + for k in ntfirsts.keys(): + if self.epslhs.get(k, ""): + reduceterms = self.followmap[k] +# print `((prodind, rhsind), term)`, reduceterms + for r in reduceterms: + inner = terms.index(r) + old = res[state_i][inner] + new = ("r", self.epslhs[k]) + if old != errentry and old != new: + print "Conflict[%d,%d]:" % (state_i, inner), old, "->", new + res[state_i][inner] = new + # + # calculate the shifts that occur but whose normal items aren't in the kernel + # + tfirsts = self.tfirstmap[nt] + for t in tfirsts: + inner = terms.index(t) + g = self.goto(self.kernelitems[state_i], t) + old = res[state_i][inner] + try: + news = self.kernelitems.index(g) + except ValueError: + continue + new = ("s", news) + if old != errentry and old != new: + print "Conflict[%d,%d]:" % (state_i, inner), old, "->", new + res[state_i][inner] = new + # + # compute the rest of the shifts that occur 'normally' in the kernel + # + else: + t = self.productions[prodind].RHS[rhsind] + inner = self.terminals.index(t) + gt = self.goto(self.kernelitems[state_i], t) + if gt in self.kernelitems: + news = self.kernelitems.index(gt) + old = res[state_i][inner] + new = ("s", news) + if old != errentry and old != new: + print "Conflict[%d,%d]:" % (state_i, inner), old, "->", new + res[state_i][inner] = new + state_i = state_i + 1 + return res + + def gototable(self): + items = self.kernelitems + res = [] + state_i = 0 + nonterms = self.nonterminals + err = None + for state in items: + list = [err] * len(nonterms) + res.append(list) + nonterm_i = 0 + for nt in nonterms: + goto = self.goto(state, nt) + if goto in items: + res[state_i][nonterm_i] = items.index(goto) + nonterm_i = nonterm_i + 1 + state_i = state_i + 1 + return res + + def mkengine(self, inbufchunksize=None, stackchunksize=None): + """dynamically will produde a parse engine, just an experiment, + don't try to use it for anything real. + """ + self.augment() + self.LALR1items() + at = self.actiontable() + gt = self.gototable() + self.productions = self.productions[1:] # unaugment + pi = self.prodinfotable() + if not inbufchunksize: + inbufchunksize = 50 + if not stackchunksize: + stackchunksize = 100 + e = PyLRengine.NewEngine(pi, at, gt, inbufchunksize, stackchunksize) + return e + + def writefile(self, filename, parsername="MyParser", lexerinit = "PyLR.Lexer.Lexer()"): + self.augment() + print "About to start LALRitems at %d" % time.time() + self.LALR1items() + print "done building LALRitems at %d" % time.time() + at = self.actiontable() + print "done building actiontable at %d" % time.time() + gt = self.gototable() + print "done building gototable at %d" % time.time() + self.productions = self.productions[1:] + pi = self.prodinfotable() + template = parsertemplate.__doc__ + vals = {"parsername": parsername, "lexerinit": lexerinit} + vals["date"] = time.ctime(time.time()) + vals["filename"] = filename + if not hasattr(self, "extrasource"): + vals["extrasource"] = "" + else: + vals["extrasource"] = self.extrasource + vals["grammar"] = `self` + actiontable_s = "[\n\t" + for l in at: + actiontable_s = "%s%s,\n\t" % (actiontable_s, `l`) + vals["actiontable"] = actiontable_s[:-3] + "\n]\n\n" + gototable_s = "[\n\t" + for l in gt: + gototable_s = "%s%s,\n\t" % (gototable_s, `l`) + vals["gototable"] = gototable_s[:-3] + "\n]\n\n" + pi_s = "[\n\t" + pii = 0 + vals["symbols"] = `self.tokens` + prod2func_s = "Production" + " " * 45 + "Method Name\n" + for l, f, e in pi: + pi_s = "%s(%d, '%s', %d),%s# %s\n\t" % (pi_s, + l, + self.productions[pii].funcname, + e, + " " * (18 - len(self.productions[pii].funcname)), + `self.productions[pii]` ) + pii = pii + 1 + vals["prodinfo"] = pi_s + "]\n\n" + fp = open(filename, "w") + fp.write(template % vals) + fp.close() + + +def _makeprod(x): + if len(x)==3: return Production(x[0],x[1],x[2]) + if len(x)==2: return Production(x[0],x[1]) + raise AttributeError, "Invalid Production initializer" + +def _bootstrap(): + # dang, how did Scott bootstrap the GrammarParser?? + # have to make this by hand + import Lexers + + # define the productions + toks = Lexers.GrammarLex().getTokenList() + prods = map(_makeprod, + [("pspec", ["gspec"]), + ("pspec", ["pydefs", "gspec"]), + ("gspec", [toks.index("GDEL"), "lhsdeflist", toks.index("GDEL")]), + ("pydefs", ["pydefs", "pydef"]), + ("pydefs", ["pydef"]), + ("pydef", [toks.index("LEX")], "lexdef"), + ("pydef", [toks.index("CODE")], "addcode"), + ("pydef", [toks.index("CLASS")], "classname"), + ("lhsdeflist", ["lhsdeflist", "lhsdef"]), + ("lhsdeflist", ["lhsdef"]), + ("lhsdef", [toks.index("ID"), toks.index("COLON"), "rhslist", toks.index("SCOLON")], "lhsdef"), + ("rhslist", ["rhs"], "singletolist"), + ("rhslist", ["rhslist", toks.index("OR"), "rhs"], "rhslist_OR_rhs"), + ("rhs", ["rhsidlist"], "rhs_idlist"), + ("rhs", ["rhsidlist", toks.index("LPAREN"), toks.index("ID"), toks.index("RPAREN")], "rhs_idlist_func"), + ("rhsidlist", ["idlist"]), + ("rhsidlist", [], "rhseps"), + ("idlist", ["idlist", toks.index("ID")], "idl_idlistID"), + ("idlist", [toks.index("ID")], "idlistID")]) + print string.join(map(lambda x: str(x), prods), "\n") + g = LALRGrammar(prods, toks) + +# g.extrasources = "import PyLR.Parsers" + # produce the parser + g.writefile("./Parsers/GrammarParser.py", "GrammarParser", "PyLR.Lexers.GrammarLex()") + +if __name__=='__main__': + _bootstrap() + diff --git a/PyLR/Lexer.py b/PyLR/Lexer.py new file mode 100644 index 00000000..4971e467 --- /dev/null +++ b/PyLR/Lexer.py @@ -0,0 +1,77 @@ +import re, string, StringUtil + +__version__ = "$Id$" + +class PyLRSyntaxError(SyntaxError): + pass + +SKIPTOK = 0x01 # don't consider this a token that is to be considered a part of the grammar, like '\n' + +class Lexer: + """ + This is a lexer class for PyLR. + + Upon matching text, it must execute a function which will cause it + to return a 2-tuple of type (tok, val) where token is an integer and + val is just any python object that will later be passed as an argument + to the functions that the parser will call when it reduces. For Example + + for the grammar + + E-> E + T + E -> T + T -> T * F + T -> F + F ->( E ) + F -> id + + it is likely that the lexer should return the token value of id and + the integer value of id (string.atoi(id)). + + In addition, the lexer must always return (eof, something else) when it's done + scanning to get the parser to continue to be called until parsing is done. + """ + def __init__(self): + self.toklist = [("EOF", None, None, 0)] + self.settext("") + + def settext(self, t): + self.text = t + self.rewind() + + def getTokenList(self): + """return list of token names""" + return map(lambda x: x[0], self.toklist) + + def rewind(self): + self.textindex = 0 + + def addpat(self, pat, tokname=None, func=None, flags=0): + """add search pattern to the lexer""" + self.toklist.append((tokname, re.compile(pat), func, flags)) + + def __str__(self): + return string.join(map(lambda x: str(x[0])+": "+str(x[1]), self.toklist), "\n") + + def scan(self, verbose=0): + if self.textindex >= len(self.text): + if verbose: print "EOF" + return (0, "EOF") + for i in range(1,len(self.toklist)): + tok = self.toklist[i] + mo = tok[1].match(self.text, self.textindex) + if mo is None: # could be the empty string + continue + self.textindex = self.textindex + len(mo.group(0)) + if tok[3] & SKIPTOK: + return self.scan(verbose) + else: + if tok[2]: + val = apply(tok[2], (mo,)) + else: + val = mo.group(0) + if verbose: print str(i)+", "+str(val) + return (i, val) + raise PyLRSyntaxError, "line "+\ + `StringUtil.getLineNumber(self.text, self.textindex)`+\ + ", near \""+self.text[self.textindex:self.textindex + 10]+"\"" diff --git a/PyLR/Lexers/GrammarLex.py b/PyLR/Lexers/GrammarLex.py new file mode 100644 index 00000000..c5bfcd70 --- /dev/null +++ b/PyLR/Lexers/GrammarLex.py @@ -0,0 +1,31 @@ +""" +this file contains the Lexer that is used in parsing Grammar specifications +""" + +import re,Lexer + +def retlex(mo): + return mo.group("lex") + +def retcode(mo): + return mo.group("code") + +def retclass(mo): + return mo.group("class") + +class GrammarLex(Lexer.Lexer): + def __init__(self): + Lexer.Lexer.__init__(self) + self.addpat(r"_lex\s+(?P[^\n]*)", "LEX", retlex) + self.addpat(r"_code\s+(?P[^\n]*)", "CODE", retcode) + self.addpat(r"_class\s+(?P[a-zA-Z_][a-zA-Z_0-9]*)", "CLASS", retclass) + self.addpat(r"[a-zA-Z_][a-zA-Z_0-9]*", "ID") + self.addpat(r":", "COLON") + self.addpat(r";", "SCOLON") + self.addpat(r"\|", "OR") + self.addpat(r"\(", "LPAREN") + self.addpat(r"\)", "RPAREN") + self.addpat(r'"""', "GDEL") + self.addpat(r"\s*#[^\n]*", "", None, Lexer.SKIPTOK) + self.addpat(r"\s+", "", None, Lexer.SKIPTOK) + diff --git a/PyLR/Lexers/__init__.py b/PyLR/Lexers/__init__.py new file mode 100644 index 00000000..d831cdfd --- /dev/null +++ b/PyLR/Lexers/__init__.py @@ -0,0 +1,5 @@ + +from GrammarLex import GrammarLex + + + diff --git a/PyLR/Lexers/mathlex.py b/PyLR/Lexers/mathlex.py new file mode 100644 index 00000000..b4a16778 --- /dev/null +++ b/PyLR/Lexers/mathlex.py @@ -0,0 +1,15 @@ +import Lexer, re, string + +def idfunc(m): + return int(m.group(0)) + +class mathlex(Lexer.Lexer): + def __init__(self): + Lexer.Lexer.__init__(self) + self.addpat(r"([1-9]([0-9]+)?)|0", "ID", idfunc) + self.addpat(r"\+", "PLUS") + self.addpat(r"\*","TIMES") + self.addpat(r"\(", "LPAREN") + self.addpat(r"\)", "RPAREN") + self.addpat(r"\s+", "", None, Lexer.SKIPTOK) + diff --git a/PyLR/Makefile b/PyLR/Makefile new file mode 100644 index 00000000..30ff3c3d --- /dev/null +++ b/PyLR/Makefile @@ -0,0 +1,319 @@ +# Generated automatically from Makefile.pre by makesetup. +# Generated automatically from Makefile.pre.in by sedscript. +# Universal Unix Makefile for Python extensions +# ============================================= + +# Short Instructions +# ------------------ + +# 1. Build and install Python (1.5 or newer). +# 2. "make -f Makefile.pre.in boot" +# 3. "make" +# You should now have a shared library. + +# Long Instructions +# ----------------- + +# Build *and install* the basic Python 1.5 distribution. See the +# Python README for instructions. (This version of Makefile.pre.in +# only withs with Python 1.5, alpha 3 or newer.) + +# Create a file Setup.in for your extension. This file follows the +# format of the Modules/Setup.in file; see the instructions there. +# For a simple module called "spam" on file "spammodule.c", it can +# contain a single line: +# spam spammodule.c +# You can build as many modules as you want in the same directory -- +# just have a separate line for each of them in the Setup.in file. + +# If you want to build your extension as a shared library, insert a +# line containing just the string +# *shared* +# at the top of your Setup.in file. + +# Note that the build process copies Setup.in to Setup, and then works +# with Setup. It doesn't overwrite Setup when Setup.in is changed, so +# while you're in the process of debugging your Setup.in file, you may +# want to edit Setup instead, and copy it back to Setup.in later. +# (All this is done so you can distribute your extension easily and +# someone else can select the modules they actually want to build by +# commenting out lines in the Setup file, without editing the +# original. Editing Setup is also used to specify nonstandard +# locations for include or library files.) + +# Copy this file (Misc/Makefile.pre.in) to the directory containing +# your extension. + +# Run "make -f Makefile.pre.in boot". This creates Makefile +# (producing Makefile.pre and sedscript as intermediate files) and +# config.c, incorporating the values for sys.prefix, sys.exec_prefix +# and sys.version from the installed Python binary. For this to work, +# the python binary must be on your path. If this fails, try +# make -f Makefile.pre.in Makefile VERSION=1.5 installdir= +# where is the prefix used to install Python for installdir +# (and possibly similar for exec_installdir=). + +# Note: "make boot" implies "make clobber" -- it assumes that when you +# bootstrap you may have changed platforms so it removes all previous +# output files. + +# If you are building your extension as a shared library (your +# Setup.in file starts with *shared*), run "make" or "make sharedmods" +# to build the shared library files. If you are building a statically +# linked Python binary (the only solution of your platform doesn't +# support shared libraries, and sometimes handy if you want to +# distribute or install the resulting Python binary), run "make +# python". + +# Note: Each time you edit Makefile.pre.in or Setup, you must run +# "make Makefile" before running "make". + +# Hint: if you want to use VPATH, you can start in an empty +# subdirectory and say (e.g.): +# make -f ../Makefile.pre.in boot srcdir=.. VPATH=.. + + +# === Bootstrap variables (edited through "make boot") === + +# The prefix used by "make inclinstall libainstall" of core python +installdir= /usr + +# The exec_prefix used by the same +exec_installdir=/usr + +# Source directory and VPATH in case you want to use VPATH. +# (You will have to edit these two lines yourself -- there is no +# automatic support as the Makefile is not generated by +# config.status.) +srcdir= . +VPATH= . + +# === Variables that you may want to customize (rarely) === + +# (Static) build target +TARGET= python + +# Installed python binary (used only by boot target) +PYTHON= python + +# Add more -I and -D options here +CFLAGS= $(OPT) -I$(INCLUDEPY) -I$(EXECINCLUDEPY) $(DEFS) + +# These two variables can be set in Setup to merge extensions. +# See example[23]. +BASELIB= +BASESETUP= + +# === Variables set by makesetup === + +MODOBJS= +MODLIBS= $(LOCALMODLIBS) $(BASEMODLIBS) + +# === Definitions added by makesetup === + +LOCALMODLIBS= +BASEMODLIBS= +SHAREDMODS= PyLRenginemodule$(SO) +TKPATH=:lib-tk +GLHACK=-Dclear=__GLclear +PYTHONPATH=$(COREPYTHONPATH) +COREPYTHONPATH=$(DESTPATH)$(SITEPATH)$(MACHDEPPATH)$(STDWINPATH)$(TKPATH) +MACHDEPPATH=:plat-$(MACHDEP) +TESTPATH= +SITEPATH= +DESTPATH= +MACHDESTLIB=$(BINLIBDEST) +DESTLIB=$(LIBDEST) + + +# === Variables from configure (through sedscript) === + +VERSION= 1.5 +CC= gcc +LINKCC= $(CC) +SGI_ABI= @SGI_ABI@ +OPT= -g -O2 +LDFLAGS= +DEFS= -DHAVE_CONFIG_H +LIBS= -lieee -ldl -lpthread +LIBM= -lm +LIBC= +RANLIB= ranlib +MACHDEP= linux2 +SO= .so +LDSHARED= gcc -shared -lc +CCSHARED= -fPIC +LINKFORSHARED= -Xlinker -export-dynamic +CCC=g++ + +# Install prefix for architecture-independent files +prefix= /usr + +# Install prefix for architecture-dependent files +exec_prefix= ${prefix} + +# === Fixed definitions === + +# Shell used by make (some versions default to the login shell, which is bad) +SHELL= /bin/sh + +# Expanded directories +BINDIR= $(exec_installdir)/bin +LIBDIR= $(exec_prefix)/lib +MANDIR= $(installdir)/man +INCLUDEDIR= $(installdir)/include +SCRIPTDIR= $(prefix)/lib + +# Detailed destination directories +BINLIBDEST= $(LIBDIR)/python$(VERSION) +LIBDEST= $(SCRIPTDIR)/python$(VERSION) +INCLUDEPY= $(INCLUDEDIR)/python$(VERSION) +EXECINCLUDEPY= $(exec_installdir)/include/python$(VERSION) +LIBP= $(exec_installdir)/lib/python$(VERSION) +DESTSHARED= $(BINLIBDEST)/site-packages + +LIBPL= $(LIBP)/config + +PYTHONLIBS= $(LIBPL)/libpython$(VERSION).a + +MAKESETUP= $(LIBPL)/makesetup +MAKEFILE= $(LIBPL)/Makefile +CONFIGC= $(LIBPL)/config.c +CONFIGCIN= $(LIBPL)/config.c.in +SETUP= $(LIBPL)/Setup + +SYSLIBS= $(LIBM) $(LIBC) + +ADDOBJS= $(LIBPL)/python.o config.o + +# Portable install script (configure doesn't always guess right) +INSTALL= $(LIBPL)/install-sh -c +# Shared libraries must be installed with executable mode on some systems; +# rather than figuring out exactly which, we always give them executable mode. +# Also, making them read-only seems to be a good idea... +INSTALL_SHARED= ${INSTALL} -m 555 + +# === Fixed rules === + +# Default target. This builds shared libraries only +default: sharedmods + +# Build everything +all: static sharedmods + +# Build shared libraries from our extension modules +sharedmods: $(SHAREDMODS) + +# Build a static Python binary containing our extension modules +static: $(TARGET) +$(TARGET): $(ADDOBJS) lib.a $(PYTHONLIBS) Makefile $(BASELIB) + $(CC) $(LDFLAGS) $(ADDOBJS) lib.a $(PYTHONLIBS) \ + $(LINKPATH) $(BASELIB) $(MODLIBS) $(LIBS) $(SYSLIBS) \ + -o $(TARGET) + +install: sharedmods + if test ! -d $(DESTSHARED) ; then \ + mkdir $(DESTSHARED) ; else true ; fi + -for i in X $(SHAREDMODS); do \ + if test $$i != X; \ + then $(INSTALL_SHARED) $$i $(DESTSHARED)/$$i; \ + fi; \ + done + +# Build the library containing our extension modules +lib.a: $(MODOBJS) + -rm -f lib.a + ar cr lib.a $(MODOBJS) + -$(RANLIB) lib.a + +# This runs makesetup *twice* to use the BASESETUP definition from Setup +config.c Makefile: Makefile.pre Setup $(BASESETUP) $(MAKESETUP) + $(MAKESETUP) \ + -m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP) + $(MAKE) -f Makefile do-it-again + +# Internal target to run makesetup for the second time +do-it-again: + $(MAKESETUP) \ + -m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP) + +# Make config.o from the config.c created by makesetup +config.o: config.c + $(CC) $(CFLAGS) -c config.c + +# Setup is copied from Setup.in *only* if it doesn't yet exist +Setup: + cp $(srcdir)/Setup.in Setup + +# Make the intermediate Makefile.pre from Makefile.pre.in +Makefile.pre: Makefile.pre.in sedscript + sed -f sedscript $(srcdir)/Makefile.pre.in >Makefile.pre + +# Shortcuts to make the sed arguments on one line +P=prefix +E=exec_prefix +H=Generated automatically from Makefile.pre.in by sedscript. +L=LINKFORSHARED + +# Make the sed script used to create Makefile.pre from Makefile.pre.in +sedscript: $(MAKEFILE) + sed -n \ + -e '1s/.*/1i\\/p' \ + -e '2s%.*%# $H%p' \ + -e '/^VERSION=/s/^VERSION=[ ]*\(.*\)/s%@VERSION[@]%\1%/p' \ + -e '/^CC=/s/^CC=[ ]*\(.*\)/s%@CC[@]%\1%/p' \ + -e '/^CCC=/s/^CCC=[ ]*\(.*\)/s%#@SET_CCC[@]%CCC=\1%/p' \ + -e '/^LINKCC=/s/^LINKCC=[ ]*\(.*\)/s%@LINKCC[@]%\1%/p' \ + -e '/^OPT=/s/^OPT=[ ]*\(.*\)/s%@OPT[@]%\1%/p' \ + -e '/^LDFLAGS=/s/^LDFLAGS=[ ]*\(.*\)/s%@LDFLAGS[@]%\1%/p' \ + -e '/^DEFS=/s/^DEFS=[ ]*\(.*\)/s%@DEFS[@]%\1%/p' \ + -e '/^LIBS=/s/^LIBS=[ ]*\(.*\)/s%@LIBS[@]%\1%/p' \ + -e '/^LIBM=/s/^LIBM=[ ]*\(.*\)/s%@LIBM[@]%\1%/p' \ + -e '/^LIBC=/s/^LIBC=[ ]*\(.*\)/s%@LIBC[@]%\1%/p' \ + -e '/^RANLIB=/s/^RANLIB=[ ]*\(.*\)/s%@RANLIB[@]%\1%/p' \ + -e '/^MACHDEP=/s/^MACHDEP=[ ]*\(.*\)/s%@MACHDEP[@]%\1%/p' \ + -e '/^SO=/s/^SO=[ ]*\(.*\)/s%@SO[@]%\1%/p' \ + -e '/^LDSHARED=/s/^LDSHARED=[ ]*\(.*\)/s%@LDSHARED[@]%\1%/p' \ + -e '/^CCSHARED=/s/^CCSHARED=[ ]*\(.*\)/s%@CCSHARED[@]%\1%/p' \ + -e '/^$L=/s/^$L=[ ]*\(.*\)/s%@$L[@]%\1%/p' \ + -e '/^$P=/s/^$P=\(.*\)/s%^$P=.*%$P=\1%/p' \ + -e '/^$E=/s/^$E=\(.*\)/s%^$E=.*%$E=\1%/p' \ + $(MAKEFILE) >sedscript + echo "/^CCC=g++/d" >>sedscript + echo "/^installdir=/s%=.*%= $(installdir)%" >>sedscript + echo "/^exec_installdir=/s%=.*%=$(exec_installdir)%" >>sedscript + echo "/^srcdir=/s%=.*%= $(srcdir)%" >>sedscript + echo "/^VPATH=/s%=.*%= $(VPATH)%" >>sedscript + echo "/^LINKPATH=/s%=.*%= $(LINKPATH)%" >>sedscript + echo "/^BASELIB=/s%=.*%= $(BASELIB)%" >>sedscript + echo "/^BASESETUP=/s%=.*%= $(BASESETUP)%" >>sedscript + +# Bootstrap target +boot: clobber + VERSION=`$(PYTHON) -c "import sys; print sys.version[:3]"`; \ + installdir=`$(PYTHON) -c "import sys; print sys.prefix"`; \ + exec_installdir=`$(PYTHON) -c "import sys; print sys.exec_prefix"`; \ + $(MAKE) -f $(srcdir)/Makefile.pre.in VPATH=$(VPATH) srcdir=$(srcdir) \ + VERSION=$$VERSION \ + installdir=$$installdir \ + exec_installdir=$$exec_installdir \ + Makefile + +# Handy target to remove intermediate files and backups +clean: + -rm -f *.o *~ + +# Handy target to remove everything that is easily regenerated +clobber: clean + -rm -f *.a tags TAGS config.c Makefile.pre $(TARGET) sedscript + -rm -f *.so *.sl so_locations + + +# Handy target to remove everything you don't want to distribute +distclean: clobber + -rm -f Makefile Setup + +# Rules appended by makedepend + +PyLRenginemodule.o: $(srcdir)/PyLRenginemodule.c; $(CC) $(CCSHARED) $(CFLAGS) -c $(srcdir)/PyLRenginemodule.c +PyLRenginemodule$(SO): PyLRenginemodule.o; $(LDSHARED) PyLRenginemodule.o -o PyLRenginemodule$(SO) diff --git a/PyLR/Makefile.pre b/PyLR/Makefile.pre new file mode 100644 index 00000000..6d97a8b9 --- /dev/null +++ b/PyLR/Makefile.pre @@ -0,0 +1,298 @@ +# Generated automatically from Makefile.pre.in by sedscript. +# Universal Unix Makefile for Python extensions +# ============================================= + +# Short Instructions +# ------------------ + +# 1. Build and install Python (1.5 or newer). +# 2. "make -f Makefile.pre.in boot" +# 3. "make" +# You should now have a shared library. + +# Long Instructions +# ----------------- + +# Build *and install* the basic Python 1.5 distribution. See the +# Python README for instructions. (This version of Makefile.pre.in +# only withs with Python 1.5, alpha 3 or newer.) + +# Create a file Setup.in for your extension. This file follows the +# format of the Modules/Setup.in file; see the instructions there. +# For a simple module called "spam" on file "spammodule.c", it can +# contain a single line: +# spam spammodule.c +# You can build as many modules as you want in the same directory -- +# just have a separate line for each of them in the Setup.in file. + +# If you want to build your extension as a shared library, insert a +# line containing just the string +# *shared* +# at the top of your Setup.in file. + +# Note that the build process copies Setup.in to Setup, and then works +# with Setup. It doesn't overwrite Setup when Setup.in is changed, so +# while you're in the process of debugging your Setup.in file, you may +# want to edit Setup instead, and copy it back to Setup.in later. +# (All this is done so you can distribute your extension easily and +# someone else can select the modules they actually want to build by +# commenting out lines in the Setup file, without editing the +# original. Editing Setup is also used to specify nonstandard +# locations for include or library files.) + +# Copy this file (Misc/Makefile.pre.in) to the directory containing +# your extension. + +# Run "make -f Makefile.pre.in boot". This creates Makefile +# (producing Makefile.pre and sedscript as intermediate files) and +# config.c, incorporating the values for sys.prefix, sys.exec_prefix +# and sys.version from the installed Python binary. For this to work, +# the python binary must be on your path. If this fails, try +# make -f Makefile.pre.in Makefile VERSION=1.5 installdir= +# where is the prefix used to install Python for installdir +# (and possibly similar for exec_installdir=). + +# Note: "make boot" implies "make clobber" -- it assumes that when you +# bootstrap you may have changed platforms so it removes all previous +# output files. + +# If you are building your extension as a shared library (your +# Setup.in file starts with *shared*), run "make" or "make sharedmods" +# to build the shared library files. If you are building a statically +# linked Python binary (the only solution of your platform doesn't +# support shared libraries, and sometimes handy if you want to +# distribute or install the resulting Python binary), run "make +# python". + +# Note: Each time you edit Makefile.pre.in or Setup, you must run +# "make Makefile" before running "make". + +# Hint: if you want to use VPATH, you can start in an empty +# subdirectory and say (e.g.): +# make -f ../Makefile.pre.in boot srcdir=.. VPATH=.. + + +# === Bootstrap variables (edited through "make boot") === + +# The prefix used by "make inclinstall libainstall" of core python +installdir= /usr + +# The exec_prefix used by the same +exec_installdir=/usr + +# Source directory and VPATH in case you want to use VPATH. +# (You will have to edit these two lines yourself -- there is no +# automatic support as the Makefile is not generated by +# config.status.) +srcdir= . +VPATH= . + +# === Variables that you may want to customize (rarely) === + +# (Static) build target +TARGET= python + +# Installed python binary (used only by boot target) +PYTHON= python + +# Add more -I and -D options here +CFLAGS= $(OPT) -I$(INCLUDEPY) -I$(EXECINCLUDEPY) $(DEFS) + +# These two variables can be set in Setup to merge extensions. +# See example[23]. +BASELIB= +BASESETUP= + +# === Variables set by makesetup === + +MODOBJS= _MODOBJS_ +MODLIBS= _MODLIBS_ + +# === Definitions added by makesetup === + +# === Variables from configure (through sedscript) === + +VERSION= 1.5 +CC= gcc +LINKCC= $(CC) +SGI_ABI= @SGI_ABI@ +OPT= -g -O2 +LDFLAGS= +DEFS= -DHAVE_CONFIG_H +LIBS= -lieee -ldl -lpthread +LIBM= -lm +LIBC= +RANLIB= ranlib +MACHDEP= linux2 +SO= .so +LDSHARED= gcc -shared -lc +CCSHARED= -fPIC +LINKFORSHARED= -Xlinker -export-dynamic +CCC=g++ + +# Install prefix for architecture-independent files +prefix= /usr + +# Install prefix for architecture-dependent files +exec_prefix= ${prefix} + +# === Fixed definitions === + +# Shell used by make (some versions default to the login shell, which is bad) +SHELL= /bin/sh + +# Expanded directories +BINDIR= $(exec_installdir)/bin +LIBDIR= $(exec_prefix)/lib +MANDIR= $(installdir)/man +INCLUDEDIR= $(installdir)/include +SCRIPTDIR= $(prefix)/lib + +# Detailed destination directories +BINLIBDEST= $(LIBDIR)/python$(VERSION) +LIBDEST= $(SCRIPTDIR)/python$(VERSION) +INCLUDEPY= $(INCLUDEDIR)/python$(VERSION) +EXECINCLUDEPY= $(exec_installdir)/include/python$(VERSION) +LIBP= $(exec_installdir)/lib/python$(VERSION) +DESTSHARED= $(BINLIBDEST)/site-packages + +LIBPL= $(LIBP)/config + +PYTHONLIBS= $(LIBPL)/libpython$(VERSION).a + +MAKESETUP= $(LIBPL)/makesetup +MAKEFILE= $(LIBPL)/Makefile +CONFIGC= $(LIBPL)/config.c +CONFIGCIN= $(LIBPL)/config.c.in +SETUP= $(LIBPL)/Setup + +SYSLIBS= $(LIBM) $(LIBC) + +ADDOBJS= $(LIBPL)/python.o config.o + +# Portable install script (configure doesn't always guess right) +INSTALL= $(LIBPL)/install-sh -c +# Shared libraries must be installed with executable mode on some systems; +# rather than figuring out exactly which, we always give them executable mode. +# Also, making them read-only seems to be a good idea... +INSTALL_SHARED= ${INSTALL} -m 555 + +# === Fixed rules === + +# Default target. This builds shared libraries only +default: sharedmods + +# Build everything +all: static sharedmods + +# Build shared libraries from our extension modules +sharedmods: $(SHAREDMODS) + +# Build a static Python binary containing our extension modules +static: $(TARGET) +$(TARGET): $(ADDOBJS) lib.a $(PYTHONLIBS) Makefile $(BASELIB) + $(CC) $(LDFLAGS) $(ADDOBJS) lib.a $(PYTHONLIBS) \ + $(LINKPATH) $(BASELIB) $(MODLIBS) $(LIBS) $(SYSLIBS) \ + -o $(TARGET) + +install: sharedmods + if test ! -d $(DESTSHARED) ; then \ + mkdir $(DESTSHARED) ; else true ; fi + -for i in X $(SHAREDMODS); do \ + if test $$i != X; \ + then $(INSTALL_SHARED) $$i $(DESTSHARED)/$$i; \ + fi; \ + done + +# Build the library containing our extension modules +lib.a: $(MODOBJS) + -rm -f lib.a + ar cr lib.a $(MODOBJS) + -$(RANLIB) lib.a + +# This runs makesetup *twice* to use the BASESETUP definition from Setup +config.c Makefile: Makefile.pre Setup $(BASESETUP) $(MAKESETUP) + $(MAKESETUP) \ + -m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP) + $(MAKE) -f Makefile do-it-again + +# Internal target to run makesetup for the second time +do-it-again: + $(MAKESETUP) \ + -m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP) + +# Make config.o from the config.c created by makesetup +config.o: config.c + $(CC) $(CFLAGS) -c config.c + +# Setup is copied from Setup.in *only* if it doesn't yet exist +Setup: + cp $(srcdir)/Setup.in Setup + +# Make the intermediate Makefile.pre from Makefile.pre.in +Makefile.pre: Makefile.pre.in sedscript + sed -f sedscript $(srcdir)/Makefile.pre.in >Makefile.pre + +# Shortcuts to make the sed arguments on one line +P=prefix +E=exec_prefix +H=Generated automatically from Makefile.pre.in by sedscript. +L=LINKFORSHARED + +# Make the sed script used to create Makefile.pre from Makefile.pre.in +sedscript: $(MAKEFILE) + sed -n \ + -e '1s/.*/1i\\/p' \ + -e '2s%.*%# $H%p' \ + -e '/^VERSION=/s/^VERSION=[ ]*\(.*\)/s%@VERSION[@]%\1%/p' \ + -e '/^CC=/s/^CC=[ ]*\(.*\)/s%@CC[@]%\1%/p' \ + -e '/^CCC=/s/^CCC=[ ]*\(.*\)/s%#@SET_CCC[@]%CCC=\1%/p' \ + -e '/^LINKCC=/s/^LINKCC=[ ]*\(.*\)/s%@LINKCC[@]%\1%/p' \ + -e '/^OPT=/s/^OPT=[ ]*\(.*\)/s%@OPT[@]%\1%/p' \ + -e '/^LDFLAGS=/s/^LDFLAGS=[ ]*\(.*\)/s%@LDFLAGS[@]%\1%/p' \ + -e '/^DEFS=/s/^DEFS=[ ]*\(.*\)/s%@DEFS[@]%\1%/p' \ + -e '/^LIBS=/s/^LIBS=[ ]*\(.*\)/s%@LIBS[@]%\1%/p' \ + -e '/^LIBM=/s/^LIBM=[ ]*\(.*\)/s%@LIBM[@]%\1%/p' \ + -e '/^LIBC=/s/^LIBC=[ ]*\(.*\)/s%@LIBC[@]%\1%/p' \ + -e '/^RANLIB=/s/^RANLIB=[ ]*\(.*\)/s%@RANLIB[@]%\1%/p' \ + -e '/^MACHDEP=/s/^MACHDEP=[ ]*\(.*\)/s%@MACHDEP[@]%\1%/p' \ + -e '/^SO=/s/^SO=[ ]*\(.*\)/s%@SO[@]%\1%/p' \ + -e '/^LDSHARED=/s/^LDSHARED=[ ]*\(.*\)/s%@LDSHARED[@]%\1%/p' \ + -e '/^CCSHARED=/s/^CCSHARED=[ ]*\(.*\)/s%@CCSHARED[@]%\1%/p' \ + -e '/^$L=/s/^$L=[ ]*\(.*\)/s%@$L[@]%\1%/p' \ + -e '/^$P=/s/^$P=\(.*\)/s%^$P=.*%$P=\1%/p' \ + -e '/^$E=/s/^$E=\(.*\)/s%^$E=.*%$E=\1%/p' \ + $(MAKEFILE) >sedscript + echo "/^CCC=g++/d" >>sedscript + echo "/^installdir=/s%=.*%= $(installdir)%" >>sedscript + echo "/^exec_installdir=/s%=.*%=$(exec_installdir)%" >>sedscript + echo "/^srcdir=/s%=.*%= $(srcdir)%" >>sedscript + echo "/^VPATH=/s%=.*%= $(VPATH)%" >>sedscript + echo "/^LINKPATH=/s%=.*%= $(LINKPATH)%" >>sedscript + echo "/^BASELIB=/s%=.*%= $(BASELIB)%" >>sedscript + echo "/^BASESETUP=/s%=.*%= $(BASESETUP)%" >>sedscript + +# Bootstrap target +boot: clobber + VERSION=`$(PYTHON) -c "import sys; print sys.version[:3]"`; \ + installdir=`$(PYTHON) -c "import sys; print sys.prefix"`; \ + exec_installdir=`$(PYTHON) -c "import sys; print sys.exec_prefix"`; \ + $(MAKE) -f $(srcdir)/Makefile.pre.in VPATH=$(VPATH) srcdir=$(srcdir) \ + VERSION=$$VERSION \ + installdir=$$installdir \ + exec_installdir=$$exec_installdir \ + Makefile + +# Handy target to remove intermediate files and backups +clean: + -rm -f *.o *~ + +# Handy target to remove everything that is easily regenerated +clobber: clean + -rm -f *.a tags TAGS config.c Makefile.pre $(TARGET) sedscript + -rm -f *.so *.sl so_locations + + +# Handy target to remove everything you don't want to distribute +distclean: clobber + -rm -f Makefile Setup diff --git a/PyLR/Makefile.pre.in b/PyLR/Makefile.pre.in new file mode 100644 index 00000000..b38a95dd --- /dev/null +++ b/PyLR/Makefile.pre.in @@ -0,0 +1,297 @@ +# Universal Unix Makefile for Python extensions +# ============================================= + +# Short Instructions +# ------------------ + +# 1. Build and install Python (1.5 or newer). +# 2. "make -f Makefile.pre.in boot" +# 3. "make" +# You should now have a shared library. + +# Long Instructions +# ----------------- + +# Build *and install* the basic Python 1.5 distribution. See the +# Python README for instructions. (This version of Makefile.pre.in +# only withs with Python 1.5, alpha 3 or newer.) + +# Create a file Setup.in for your extension. This file follows the +# format of the Modules/Setup.in file; see the instructions there. +# For a simple module called "spam" on file "spammodule.c", it can +# contain a single line: +# spam spammodule.c +# You can build as many modules as you want in the same directory -- +# just have a separate line for each of them in the Setup.in file. + +# If you want to build your extension as a shared library, insert a +# line containing just the string +# *shared* +# at the top of your Setup.in file. + +# Note that the build process copies Setup.in to Setup, and then works +# with Setup. It doesn't overwrite Setup when Setup.in is changed, so +# while you're in the process of debugging your Setup.in file, you may +# want to edit Setup instead, and copy it back to Setup.in later. +# (All this is done so you can distribute your extension easily and +# someone else can select the modules they actually want to build by +# commenting out lines in the Setup file, without editing the +# original. Editing Setup is also used to specify nonstandard +# locations for include or library files.) + +# Copy this file (Misc/Makefile.pre.in) to the directory containing +# your extension. + +# Run "make -f Makefile.pre.in boot". This creates Makefile +# (producing Makefile.pre and sedscript as intermediate files) and +# config.c, incorporating the values for sys.prefix, sys.exec_prefix +# and sys.version from the installed Python binary. For this to work, +# the python binary must be on your path. If this fails, try +# make -f Makefile.pre.in Makefile VERSION=1.5 installdir= +# where is the prefix used to install Python for installdir +# (and possibly similar for exec_installdir=). + +# Note: "make boot" implies "make clobber" -- it assumes that when you +# bootstrap you may have changed platforms so it removes all previous +# output files. + +# If you are building your extension as a shared library (your +# Setup.in file starts with *shared*), run "make" or "make sharedmods" +# to build the shared library files. If you are building a statically +# linked Python binary (the only solution of your platform doesn't +# support shared libraries, and sometimes handy if you want to +# distribute or install the resulting Python binary), run "make +# python". + +# Note: Each time you edit Makefile.pre.in or Setup, you must run +# "make Makefile" before running "make". + +# Hint: if you want to use VPATH, you can start in an empty +# subdirectory and say (e.g.): +# make -f ../Makefile.pre.in boot srcdir=.. VPATH=.. + + +# === Bootstrap variables (edited through "make boot") === + +# The prefix used by "make inclinstall libainstall" of core python +installdir= /usr/local + +# The exec_prefix used by the same +exec_installdir=$(installdir) + +# Source directory and VPATH in case you want to use VPATH. +# (You will have to edit these two lines yourself -- there is no +# automatic support as the Makefile is not generated by +# config.status.) +srcdir= . +VPATH= . + +# === Variables that you may want to customize (rarely) === + +# (Static) build target +TARGET= python + +# Installed python binary (used only by boot target) +PYTHON= python + +# Add more -I and -D options here +CFLAGS= $(OPT) -I$(INCLUDEPY) -I$(EXECINCLUDEPY) $(DEFS) + +# These two variables can be set in Setup to merge extensions. +# See example[23]. +BASELIB= +BASESETUP= + +# === Variables set by makesetup === + +MODOBJS= _MODOBJS_ +MODLIBS= _MODLIBS_ + +# === Definitions added by makesetup === + +# === Variables from configure (through sedscript) === + +VERSION= @VERSION@ +CC= @CC@ +LINKCC= @LINKCC@ +SGI_ABI= @SGI_ABI@ +OPT= @OPT@ +LDFLAGS= @LDFLAGS@ +DEFS= @DEFS@ +LIBS= @LIBS@ +LIBM= @LIBM@ +LIBC= @LIBC@ +RANLIB= @RANLIB@ +MACHDEP= @MACHDEP@ +SO= @SO@ +LDSHARED= @LDSHARED@ +CCSHARED= @CCSHARED@ +LINKFORSHARED= @LINKFORSHARED@ +#@SET_CCC@ + +# Install prefix for architecture-independent files +prefix= /usr/local + +# Install prefix for architecture-dependent files +exec_prefix= $(prefix) + +# === Fixed definitions === + +# Shell used by make (some versions default to the login shell, which is bad) +SHELL= /bin/sh + +# Expanded directories +BINDIR= $(exec_installdir)/bin +LIBDIR= $(exec_prefix)/lib +MANDIR= $(installdir)/man +INCLUDEDIR= $(installdir)/include +SCRIPTDIR= $(prefix)/lib + +# Detailed destination directories +BINLIBDEST= $(LIBDIR)/python$(VERSION) +LIBDEST= $(SCRIPTDIR)/python$(VERSION) +INCLUDEPY= $(INCLUDEDIR)/python$(VERSION) +EXECINCLUDEPY= $(exec_installdir)/include/python$(VERSION) +LIBP= $(exec_installdir)/lib/python$(VERSION) +DESTSHARED= $(BINLIBDEST)/site-packages + +LIBPL= $(LIBP)/config + +PYTHONLIBS= $(LIBPL)/libpython$(VERSION).a + +MAKESETUP= $(LIBPL)/makesetup +MAKEFILE= $(LIBPL)/Makefile +CONFIGC= $(LIBPL)/config.c +CONFIGCIN= $(LIBPL)/config.c.in +SETUP= $(LIBPL)/Setup + +SYSLIBS= $(LIBM) $(LIBC) + +ADDOBJS= $(LIBPL)/python.o config.o + +# Portable install script (configure doesn't always guess right) +INSTALL= $(LIBPL)/install-sh -c +# Shared libraries must be installed with executable mode on some systems; +# rather than figuring out exactly which, we always give them executable mode. +# Also, making them read-only seems to be a good idea... +INSTALL_SHARED= ${INSTALL} -m 555 + +# === Fixed rules === + +# Default target. This builds shared libraries only +default: sharedmods + +# Build everything +all: static sharedmods + +# Build shared libraries from our extension modules +sharedmods: $(SHAREDMODS) + +# Build a static Python binary containing our extension modules +static: $(TARGET) +$(TARGET): $(ADDOBJS) lib.a $(PYTHONLIBS) Makefile $(BASELIB) + $(CC) $(LDFLAGS) $(ADDOBJS) lib.a $(PYTHONLIBS) \ + $(LINKPATH) $(BASELIB) $(MODLIBS) $(LIBS) $(SYSLIBS) \ + -o $(TARGET) + +install: sharedmods + if test ! -d $(DESTSHARED) ; then \ + mkdir $(DESTSHARED) ; else true ; fi + -for i in X $(SHAREDMODS); do \ + if test $$i != X; \ + then $(INSTALL_SHARED) $$i $(DESTSHARED)/$$i; \ + fi; \ + done + +# Build the library containing our extension modules +lib.a: $(MODOBJS) + -rm -f lib.a + ar cr lib.a $(MODOBJS) + -$(RANLIB) lib.a + +# This runs makesetup *twice* to use the BASESETUP definition from Setup +config.c Makefile: Makefile.pre Setup $(BASESETUP) $(MAKESETUP) + $(MAKESETUP) \ + -m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP) + $(MAKE) -f Makefile do-it-again + +# Internal target to run makesetup for the second time +do-it-again: + $(MAKESETUP) \ + -m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP) + +# Make config.o from the config.c created by makesetup +config.o: config.c + $(CC) $(CFLAGS) -c config.c + +# Setup is copied from Setup.in *only* if it doesn't yet exist +Setup: + cp $(srcdir)/Setup.in Setup + +# Make the intermediate Makefile.pre from Makefile.pre.in +Makefile.pre: Makefile.pre.in sedscript + sed -f sedscript $(srcdir)/Makefile.pre.in >Makefile.pre + +# Shortcuts to make the sed arguments on one line +P=prefix +E=exec_prefix +H=Generated automatically from Makefile.pre.in by sedscript. +L=LINKFORSHARED + +# Make the sed script used to create Makefile.pre from Makefile.pre.in +sedscript: $(MAKEFILE) + sed -n \ + -e '1s/.*/1i\\/p' \ + -e '2s%.*%# $H%p' \ + -e '/^VERSION=/s/^VERSION=[ ]*\(.*\)/s%@VERSION[@]%\1%/p' \ + -e '/^CC=/s/^CC=[ ]*\(.*\)/s%@CC[@]%\1%/p' \ + -e '/^CCC=/s/^CCC=[ ]*\(.*\)/s%#@SET_CCC[@]%CCC=\1%/p' \ + -e '/^LINKCC=/s/^LINKCC=[ ]*\(.*\)/s%@LINKCC[@]%\1%/p' \ + -e '/^OPT=/s/^OPT=[ ]*\(.*\)/s%@OPT[@]%\1%/p' \ + -e '/^LDFLAGS=/s/^LDFLAGS=[ ]*\(.*\)/s%@LDFLAGS[@]%\1%/p' \ + -e '/^DEFS=/s/^DEFS=[ ]*\(.*\)/s%@DEFS[@]%\1%/p' \ + -e '/^LIBS=/s/^LIBS=[ ]*\(.*\)/s%@LIBS[@]%\1%/p' \ + -e '/^LIBM=/s/^LIBM=[ ]*\(.*\)/s%@LIBM[@]%\1%/p' \ + -e '/^LIBC=/s/^LIBC=[ ]*\(.*\)/s%@LIBC[@]%\1%/p' \ + -e '/^RANLIB=/s/^RANLIB=[ ]*\(.*\)/s%@RANLIB[@]%\1%/p' \ + -e '/^MACHDEP=/s/^MACHDEP=[ ]*\(.*\)/s%@MACHDEP[@]%\1%/p' \ + -e '/^SO=/s/^SO=[ ]*\(.*\)/s%@SO[@]%\1%/p' \ + -e '/^LDSHARED=/s/^LDSHARED=[ ]*\(.*\)/s%@LDSHARED[@]%\1%/p' \ + -e '/^CCSHARED=/s/^CCSHARED=[ ]*\(.*\)/s%@CCSHARED[@]%\1%/p' \ + -e '/^$L=/s/^$L=[ ]*\(.*\)/s%@$L[@]%\1%/p' \ + -e '/^$P=/s/^$P=\(.*\)/s%^$P=.*%$P=\1%/p' \ + -e '/^$E=/s/^$E=\(.*\)/s%^$E=.*%$E=\1%/p' \ + $(MAKEFILE) >sedscript + echo "/^#@SET_CCC@/d" >>sedscript + echo "/^installdir=/s%=.*%= $(installdir)%" >>sedscript + echo "/^exec_installdir=/s%=.*%=$(exec_installdir)%" >>sedscript + echo "/^srcdir=/s%=.*%= $(srcdir)%" >>sedscript + echo "/^VPATH=/s%=.*%= $(VPATH)%" >>sedscript + echo "/^LINKPATH=/s%=.*%= $(LINKPATH)%" >>sedscript + echo "/^BASELIB=/s%=.*%= $(BASELIB)%" >>sedscript + echo "/^BASESETUP=/s%=.*%= $(BASESETUP)%" >>sedscript + +# Bootstrap target +boot: clobber + VERSION=`$(PYTHON) -c "import sys; print sys.version[:3]"`; \ + installdir=`$(PYTHON) -c "import sys; print sys.prefix"`; \ + exec_installdir=`$(PYTHON) -c "import sys; print sys.exec_prefix"`; \ + $(MAKE) -f $(srcdir)/Makefile.pre.in VPATH=$(VPATH) srcdir=$(srcdir) \ + VERSION=$$VERSION \ + installdir=$$installdir \ + exec_installdir=$$exec_installdir \ + Makefile + +# Handy target to remove intermediate files and backups +clean: + -rm -f *.o *~ + +# Handy target to remove everything that is easily regenerated +clobber: clean + -rm -f *.a tags TAGS config.c Makefile.pre $(TARGET) sedscript + -rm -f *.so *.sl so_locations + + +# Handy target to remove everything you don't want to distribute +distclean: clobber + -rm -f Makefile Setup diff --git a/PyLR/Parser.py b/PyLR/Parser.py new file mode 100644 index 00000000..38a81885 --- /dev/null +++ b/PyLR/Parser.py @@ -0,0 +1,45 @@ + +__version__ = "$Id$" + +import PyLRengine + + +class Parser: + + def __init__(self, lexer, actiontable, gototable, prodinfo): + self.lexer = lexer + self.actions = actiontable + self.gotos = gototable + # get the function from the function name + # if we forgot to supply a function we get an AttributeError here + try: self.prodinfo = map(lambda x,s=self: (x[0], getattr(s, x[1]), x[2]), + prodinfo) + except AttributeError: + sys.stderr.write("Parser: error: forgot to supply a parser function\n") + raise + self.engine = None + + # the unspecified function (the default for all productions) + def unspecified(*args): + return args[1] + + def initengine(self, dodel=0): + self.engine = PyLRengine.NewEngine(self.prodinfo, self.actions, self.gotos) + if dodel: + self.actions = [] + self.gotos = [] + self.prodinfo = [] + + def parse(self, text, verbose=0): + self.initengine() + self.lexer.settext(text) + while 1: + tok, val = self.lexer.scan(verbose) + if not self.engine.parse(tok, val, verbose): + break + # need to add a method to the engine to + # return the final value + # and return that here + return None + + diff --git a/PyLR/Parsers/GrammarParser.py b/PyLR/Parsers/GrammarParser.py new file mode 100644 index 00000000..265cd48a --- /dev/null +++ b/PyLR/Parsers/GrammarParser.py @@ -0,0 +1,169 @@ +""" + ./Parsers/GrammarParser.py -- created Wed Feb 23 15:23:44 2000 + +This file was automatically generated by the PyLR parser generator. +It defines the tables 'actiontable', 'gototable', and 'prodinfo'. These +tables are used to give functionality to a parsing engine. It also defines +A Parser class called GrammarParser which will use this engine. It's usage +is indicated in GrammarParser's doc-string. +""" +# +# this section contains source code added by the user +# plus 'import PyLR' +# + +import PyLR + +# +# the action table ('s', 4) means shift to state 4, +# ('r', 4) means reduce by production number 4 +# other entries are errors. each row represents a state +# and each column a terminal lookahead symbol (excluding symbols with +# Lexer.SKIPTOK). +# Lexer symbols are: +# ['EOF', 'LEX', 'CODE', 'CLASS', 'ID', 'COLON', 'SCOLON', 'OR', 'LPAREN', 'RPAREN', 'GDEL', '', ''] +# +_actiontable = [ + [('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('a', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 1)], + [('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 2)], + [('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 7), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 3)], + [('r', 4), ('r', 4), ('r', 4), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 4), ('', -1)], + [('r', 5), ('r', 5), ('r', 5), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 5), ('', -1)], + [('r', 6), ('r', 6), ('r', 6), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 6), ('', -1)], + [('r', 7), ('r', 7), ('r', 7), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 7), ('', -1)], + [('r', 8), ('r', 8), ('r', 8), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 8), ('', -1)], + [('', -1), ('', -1), ('', -1), ('r', 9), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 9), ('', -1)], + [('', -1), ('', -1), ('', -1), ('r', 10), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 10), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('s', 16), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 18), ('s', 20), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('r', 11), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 11), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 12), ('r', 12), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 13), ('r', 13), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 14), ('r', 14), ('s', 23), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('s', 24), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 25), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 15), ('r', 15), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('s', 27), ('', -1), ('r', 16), ('r', 16), ('r', 16), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('r', 18), ('', -1), ('r', 18), ('r', 18), ('r', 18), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('r', 19), ('', -1), ('r', 19), ('r', 19), ('r', 19), ('', -1), ('', -1), ('', -1)] +] + + + +# +# the goto table, each row represents a state +# and each column, the nonterminal that was on the lhs of the +# reduction +# +_gototable = [ + [1, 2, 3, 9, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, 4, None, 8, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, 6, 14, None, None, None, None], + [None, None, None, None, None, 13, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, 17, 19, 22, 26], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, 21, 22, 26], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None] +] + + + +# +# This is the prodinfo table. each row represents a production +# the entries are the length of the production, the name of a method +# in an instance of the GrammarParser class below that gets called +# when that production occurs, and the index of the lhs in the +# nonterminals (as in # the gototable) +# +_prodinfo = [ + (1, 'unspecified', 0), # pspec: gspec (unspecified) + (2, 'unspecified', 0), # pspec: pydefs gspec (unspecified) + (3, 'unspecified', 1), # gspec: 10 lhsdeflist 10 (unspecified) + (2, 'unspecified', 2), # pydefs: pydefs pydef (unspecified) + (1, 'unspecified', 2), # pydefs: pydef (unspecified) + (1, 'lexdef', 3), # pydef: 1 (lexdef) + (1, 'addcode', 3), # pydef: 2 (addcode) + (1, 'classname', 3), # pydef: 3 (classname) + (2, 'unspecified', 4), # lhsdeflist: lhsdeflist lhsdef (unspecified) + (1, 'unspecified', 4), # lhsdeflist: lhsdef (unspecified) + (4, 'lhsdef', 5), # lhsdef: 4 5 rhslist 6 (lhsdef) + (1, 'singletolist', 6), # rhslist: rhs (singletolist) + (3, 'rhslist_OR_rhs', 6), # rhslist: rhslist 7 rhs (rhslist_OR_rhs) + (1, 'rhs_idlist', 7), # rhs: rhsidlist (rhs_idlist) + (4, 'rhs_idlist_func', 7), # rhs: rhsidlist 8 4 9 (rhs_idlist_func) + (1, 'unspecified', 8), # rhsidlist: idlist (unspecified) + (0, 'rhseps', 8), # rhsidlist: (rhseps) + (2, 'idl_idlistID', 9), # idlist: idlist 4 (idl_idlistID) + (1, 'idlistID', 9), # idlist: 4 (idlistID) + ] + + + + +class GrammarParser(PyLR.Parser.Parser): + """ + this class was produced automatically by the PyLR parser generator. + It is meant to be subclassed to produce a parser for the grammar + +pspec: gspec (unspecified); +pspec: pydefs gspec (unspecified); +gspec: GDEL lhsdeflist GDEL (unspecified); +pydefs: pydefs pydef (unspecified); +pydefs: pydef (unspecified); +pydef: LEX (lexdef); +pydef: CODE (addcode); +pydef: CLASS (classname); +lhsdeflist: lhsdeflist lhsdef (unspecified); +lhsdeflist: lhsdef (unspecified); +lhsdef: ID COLON rhslist SCOLON (lhsdef); +rhslist: rhs (singletolist); +rhslist: rhslist OR rhs (rhslist_OR_rhs); +rhs: rhsidlist (rhs_idlist); +rhs: rhsidlist LPAREN ID RPAREN (rhs_idlist_func); +rhsidlist: idlist (unspecified); +rhsidlist: (rhseps); +idlist: idlist ID (idl_idlistID); +idlist: ID (idlistID); + + While parsing input, if one of the above productions is recognized, + a method of your sub-class (whose name is indicated in parens to the + right) will be invoked. Names marked 'unspecified' should be ignored. + + usage: + +class MyGrammarParser(GrammarParser): + # ...define the methods for the productions... + +p = MyGrammarParser(); p.parse(text) + """ + def __init__(self): + lexer = PyLR.Lexers.GrammarLex() + PyLR.Parser.Parser.__init__(self, lexer, _actiontable, _gototable, _prodinfo) diff --git a/PyLR/Parsers/__init__.py b/PyLR/Parsers/__init__.py new file mode 100644 index 00000000..b920d19a --- /dev/null +++ b/PyLR/Parsers/__init__.py @@ -0,0 +1,7 @@ +"""if you want to make parsers available from this package directly, +that is, if you want 'from PyLR.Parsers import RandomParser' to +work, import the name here +""" + +from GrammarParser import GrammarParser + diff --git a/PyLR/Parsers/gram.py b/PyLR/Parsers/gram.py new file mode 100644 index 00000000..d852517b --- /dev/null +++ b/PyLR/Parsers/gram.py @@ -0,0 +1,170 @@ +""" + out -- created Tue Dec 16 00:30:36 1997 + +This file was automatically generated by the PyLR parser generator. +It defines the tables 'actiontable', 'gototable', and 'prodinfo'. These +tables are used to give functionality to a parsing engine. It also defines +A Parser class called GrammarParser which will use this engine. It's Usage is +indicated in GrammarParser's doc-string. +""" +# +# this section contains source code added by the user +# plus 'import PyLR' +# + +import PyLR.Lexers +import PyLR.Parser +import PyLR + +# +# the action table ('s', 4) means shift to state 4, +# ('r', 4) means reduce by production number 4 +# other entries are errors. each row represents a state +# and each column a terminal lookahead symbol (plus EOF) +# these symbols are ['LEX', 'CODE', 'CLASS', 'ID', 'COLON', 'SCOLON', 'OR', 'LPAREN', 'RPAREN', 'GDEL', 'EOF'] +# +_actiontable = [ + [('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('a', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 1)], + [('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 2)], + [('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 7), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 3)], + [('r', 4), ('r', 4), ('r', 4), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 4), ('', -1)], + [('r', 5), ('r', 5), ('r', 5), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 5), ('', -1)], + [('r', 6), ('r', 6), ('r', 6), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 6), ('', -1)], + [('r', 7), ('r', 7), ('r', 7), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 7), ('', -1)], + [('r', 8), ('r', 8), ('r', 8), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 8), ('', -1)], + [('', -1), ('', -1), ('', -1), ('r', 9), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 9), ('', -1)], + [('', -1), ('', -1), ('', -1), ('r', 10), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 10), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('s', 16), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 18), ('s', 20), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('r', 11), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 11), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 12), ('r', 12), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 13), ('r', 13), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 14), ('r', 14), ('s', 23), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('s', 24), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 25), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 15), ('r', 15), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('s', 27), ('', -1), ('r', 16), ('r', 16), ('r', 16), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('r', 18), ('', -1), ('r', 18), ('r', 18), ('r', 18), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('r', 19), ('', -1), ('r', 19), ('r', 19), ('r', 19), ('', -1), ('', -1), ('', -1)] +] + + + +# +# the goto table, each row represents a state +# and each column, the nonterminal that was on the lhs of the +# reduction +# +_gototable = [ + [1, 2, 3, 9, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, 4, None, 8, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, 6, 14, None, None, None, None], + [None, None, None, None, None, 13, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, 17, 19, 22, 26], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, 21, 22, 26], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None] +] + + + +# +# This is the prodinfo table. each row represents a production +# the entries are the length of the production, the name of a method +# in an instance of the GrammarParser class below that gets called +# when that production occurs, and the index of the lhs in the +# nonterminals (as in # the gototable) +# +_prodinfo = [ + (1, 'unspecified', 0), # pspec -> ['gspec'] + (2, 'unspecified', 0), # pspec -> ['pydefs', 'gspec'] + (3, 'unspecified', 1), # gspec -> ['GDEL', 'lhsdeflist', 'GDEL'] + (2, 'unspecified', 2), # pydefs -> ['pydefs', 'pydef'] + (1, 'unspecified', 2), # pydefs -> ['pydef'] + (1, 'lexdef', 3), # pydef -> ['LEX'] + (1, 'addcode', 3), # pydef -> ['CODE'] + (1, 'classname', 3), # pydef -> ['CLASS'] + (2, 'unspecified', 4), # lhsdeflist -> ['lhsdeflist', 'lhsdef'] + (1, 'unspecified', 4), # lhsdeflist -> ['lhsdef'] + (4, 'lhsdef', 5), # lhsdef -> ['ID', 'COLON', 'rhslist', 'SCOLON'] + (1, 'singletolist', 6), # rhslist -> ['rhs'] + (3, 'rhslist_OR_rhs', 6), # rhslist -> ['rhslist', 'OR', 'rhs'] + (1, 'rhs_idlist', 7), # rhs -> ['rhsidlist'] + (4, 'rhs_idlist_func', 7), # rhs -> ['rhsidlist', 'LPAREN', 'ID', 'RPAREN'] + (1, 'unspecified', 8), # rhsidlist -> ['idlist'] + (0, 'rhseps', 8), # rhsidlist -> [] + (2, 'idl_idlistID', 9), # idlist -> ['idlist', 'ID'] + (1, 'idlistID', 9), # idlist -> ['ID'] + ] + + + + +class GrammarParser (PyLR.Parser.Parser): + """ + this class was produced automatically by the PyLR parser generator. + It is meant to be subclassed to produce a parser for the grammar + +pspec -> gspec (unspecified) + | pydefs gspec; (unspecified) +gspec -> GDEL lhsdeflist GDEL; (unspecified) +pydefs -> pydefs pydef (unspecified) + | pydef; (unspecified) +pydef -> LEX (lexdef) + | CODE (addcode) + | CLASS; (classname) +lhsdeflist -> lhsdeflist lhsdef (unspecified) + | lhsdef; (unspecified) +lhsdef -> ID COLON rhslist SCOLON; (lhsdef) +rhslist -> rhs (singletolist) + | rhslist OR rhs; (rhslist_OR_rhs) +rhs -> rhsidlist (rhs_idlist) + | rhsidlist LPAREN ID RPAREN; (rhs_idlist_func) +rhsidlist -> idlist (unspecified) + | ; (rhseps) +idlist -> idlist ID (idl_idlistID) + | ID; (idlistID) + + While parsing input, if one of the above productions is recognized, + a method of your sub-class (whose name is indicated in parens to the + right) will be invoked. Names marked 'unspecified' should be ignored. + + usage: + +class MyGrammarParser(GrammarParser): + # ...define the methods for the productions... + +p = MyGrammarParser(); p.parse(text) + """ + + def __init__(self): + lexer = PyLR.Lexers.GrammarLex() + PyLR.Parser.Parser.__init__(self, lexer, _actiontable, _gototable, _prodinfo) diff --git a/PyLR/PyLRengine.h b/PyLR/PyLRengine.h new file mode 100644 index 00000000..412a30bb --- /dev/null +++ b/PyLR/PyLRengine.h @@ -0,0 +1,81 @@ +#ifndef Py_PYLRENGINE_H +#define Py_PYLRENGINE_H +#ifdef __cplusplus +extern "C" { +#endif + +#define EOBUF -1 + +struct inbufdatum { + PyObject* pylrval; + int tok; +}; + +struct inbufdata { + struct inbufdatum** chunk; + struct inbufdata* next; +}; + +typedef struct inbuf_struct { + struct inbufdata* data; + int bi; + int nextinput; + int chunksize; +} inbuftype; + +struct stackdatum { + int state; + int tok; + PyObject* pylrval; +}; + +struct stackdata { + struct stackdatum** bucket; + struct stackdata* next; +}; + +typedef struct stack_struct { + struct stackdata* data; + int si; + int chunksize; +} stacktype; + +typedef struct prodinfo_struct { + int len; + PyObject* func; + int lhsi; +} prodinfo_type; + +typedef struct actionstruct{ + int arg; + short act; +} actiontype; + +/*********************************************************************** + * the possible values of the action table + ***********************************************************************/ + +#define SHIFT 's' +#define REDUCE 'r' +#define ACCEPT 'a' + +typedef struct { + PyObject_HEAD + inbuftype* inbuf; + stacktype* stack; + prodinfo_type** prodinfo; + int prodinfosize; + int** gototable; + int goto_x; + int goto_y; + actiontype*** actiontable; + int act_x; + int act_y; + int toksadded; +} parserobject; + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PYLRENGINE_H */ diff --git a/PyLR/PyLRenginemodule.c b/PyLR/PyLRenginemodule.c new file mode 100644 index 00000000..d7332560 --- /dev/null +++ b/PyLR/PyLRenginemodule.c @@ -0,0 +1,717 @@ +/*********************************************************************** + * This file defines an ParseEngine (LR), It references a Parsing table + * that is defined in python. + * + * This defines a new type object in Python, called a Parser. It has + * 3 methods, .parse(int: token, char *: text), + * of them). .setaction(production), and .getaction(production). + * + * $Id$ + * + ***********************************************************************/ + +#include +#include +#include "Python.h" +#include "PyLRengine.h" + +/*********************************************************************** + * PyLRengine Error things + ***********************************************************************/ +static PyObject* PyLRParseError; + +#define CHECK_MALLOC(obj) \ + if (!(obj = (PyObject *) malloc (sizeof(PyObject)))) { \ + PyErr_SetString(PyExc_MemoryError, "no more memory"); \ + return NULL; \ + } + +#define onError(message) \ +{ PyErr_SetString(PyExc_ParseError, message); return NULL; } + + + +/*********************************************************************** + * The engines input buffer. has a chunksize controllable from within + * python. functions are initinbufdata, init_inbuf, incbi, add2buf, + * gettoken, petpylrval, dumpinbuf + ***********************************************************************/ + +static struct inbufdata * init_inbufdata(chunksize) + int chunksize; +{ + struct inbufdata * ibd; + int i; + + if ((ibd = (struct inbufdata *) malloc(sizeof(struct inbufdata))) == NULL) { + printf("No more Memory!\n"); + exit(1); + } + if ((ibd->chunk = (struct inbufdatum **) malloc(sizeof(struct inbufdatum *) * chunksize)) == NULL) { + printf("No more Memory!\n"); + exit(1); + } + for (i=0; ichunk[i] = (struct inbufdatum *) malloc(sizeof(struct inbufdatum))) == NULL) { + onError("Memory"); + } + ibd->chunk[i]->tok = EOBUF; + ibd->chunk[i]->pylrval = NULL; + } + ibd->next = NULL; + return ibd; +} + +static inbuftype * init_inbuf(chunksize) + int chunksize; +{ + inbuftype * ib; + if ((ib = (inbuftype *)malloc(sizeof(inbuftype))) == NULL) { + printf("No more Memory!\n"); + exit(1); + } + ib->bi = 0; + ib->data = init_inbufdata(chunksize); + ib->chunksize = chunksize; + ib->nextinput = 0; + return ib; +} + +static void incbi(inbuf) + inbuftype * inbuf; +{ + struct inbufdata * tmpdata; + if ((! ((inbuf->bi + 1) % inbuf->chunksize)) && (inbuf->bi != 0)) { + tmpdata = inbuf->data->next; + free(inbuf->data); + inbuf->data = tmpdata; + } + inbuf->bi++; +} + +static void add2buf(inbuf, tok, pylrval) + inbuftype * inbuf; int tok; PyObject * pylrval; +{ + struct inbufdata * orgibd = inbuf->data; + struct inbufdata * newibd; + while(inbuf->data->next != NULL) + inbuf->data = inbuf->data->next; + if ((! (inbuf->nextinput % inbuf->chunksize)) && (inbuf->nextinput != 0)) { /* make new chunk at end */ + newibd = init_inbufdata(inbuf->chunksize); + newibd->chunk[0]->tok = tok; + newibd->chunk[0]->pylrval = pylrval; + inbuf->data->next = newibd; + } else { + inbuf->data->chunk[(inbuf->nextinput % inbuf->chunksize)]->tok = tok; + inbuf->data->chunk[(inbuf->nextinput % inbuf->chunksize)]->pylrval = pylrval; + } + inbuf->nextinput++; + inbuf->data = orgibd; +} + + +#define gettoken(ib) ((ib)->data->chunk[ (ib)->bi % (ib)->chunksize]->tok) +#define getpylrval(ib) ((ib)->data->chunk[ (ib)->bi % (ib)->chunksize]->pylrval) + +static void dumpinbuf(inbuf) + + inbuftype* inbuf; +{ + int i, j; + struct inbufdata * orgibd = inbuf->data; + printf ("inbuf at %p with bi at %d and chunksize of %d and nextinput at %d:\n", inbuf, \ + inbuf->bi, inbuf->chunksize, inbuf->nextinput); + j = 0; + for (inbuf->data; inbuf->data != NULL; inbuf->data = inbuf->data->next) { + printf("\tchunk %d:\n", j); + for (i=0; i < inbuf->chunksize; i++) { + printf("\t\tchunk[%d]->tok = %d; pylrval at %p\n", + i, + inbuf->data->chunk[i]->tok, + inbuf->data->chunk[i]->pylrval); + } + j++; + } + inbuf->data = orgibd; +} + +/*********************************************************************** + * the Stack + ***********************************************************************/ + +static stacktype * init_stack (stackchunksize) + int stackchunksize; +{ + stacktype * newstack; + if (( newstack = (stacktype *) malloc(sizeof(stacktype))) == NULL) { + PyErr_SetString(PyLRengineError, "Memory Error"); + return NULL; + } + newstack->si = 0; + newstack->data = NULL; + newstack->chunksize = stackchunksize; + return newstack; +} + + +static struct stackdata * init_stackdata (stackchunksize) + int stackchunksize; +{ + struct stackdata * newstackdata; + int i; + + if ((newstackdata = (struct stackdata *) malloc (sizeof (struct stackdata))) == NULL) { + PyErr_SetString(PyLRengineError, "Memory Error"); + return NULL; + } + if ((newstackdata->bucket = (struct stackdatum **) malloc (sizeof (struct stackdatum *) * stackchunksize)) == NULL) { + PyErr_SetString(PyLRengineError, "Memory Error"); + return NULL; + } + for (i=0; i < stackchunksize; i++) { + if ((newstackdata->bucket[i] = (struct stackdatum *) malloc(sizeof (struct stackdatum))) == NULL) { + onError("Memory Error"); + } + newstackdata->bucket[i]->state = -1; + newstackdata->bucket[i]->tok = -1; + newstackdata->bucket[i]->pylrval = NULL; + } + newstackdata->next = NULL; + return newstackdata; +} + + +static void push (stack, token, state, pylrval) + stacktype * stack; + int token; + int state; + PyObject * pylrval; +{ + struct stackdata *newstackdata; + if (! (stack->si % stack->chunksize)) { + newstackdata = init_stackdata(stack->chunksize); + newstackdata->bucket[0]->tok = token; + newstackdata->bucket[0]->state = state; + newstackdata->bucket[0]->pylrval = pylrval; + newstackdata->next = stack->data; + stack->data = newstackdata; + } else { + stack->data->bucket[stack->si % stack->chunksize]->tok = token; + stack->data->bucket[stack->si % stack->chunksize]->state = state; + stack->data->bucket[stack->si % stack->chunksize]->pylrval = pylrval; + } + Py_XINCREF(pylrval); + stack->si++; +} + +static void show_stack(stack) + struct stack_struct * stack; +{ + struct stackdata * orgstackdata; + int i; + orgstackdata = stack->data; + printf("stack at %p:\n", stack); + for (stack->data; stack->data != NULL; stack->data = stack->data->next) { + printf("stack->data at %p\n", stack->data); + for (i=0; ichunksize; i++) { + printf ("stack->data->bucket[%d] = (%d, %d, %p)\n", + i, + stack->data->bucket[i]->tok, + stack->data->bucket[i]->state, + stack->data->bucket[i]->pylrval); + } + } + stack->data = orgstackdata; +} + + +/*********************************************************************** + * This function returns the python objects stored on the stack so that + * they can then be passed to the appropriate function (popping the stack + * only occurs when a reduce operation is called, so the python objects + * returned get passed to the function associated with the production that + * is associated with popping items from the stack. see the method parser_parse + * for how this works in more detail + ***********************************************************************/ + +static PyObject ** pop(stack, amt) + stacktype * stack; + int amt; +{ + struct stackdata * tmpsd; + PyObject ** popped_pylrvals; + int c = 0; + if (amt == 0) + return NULL; + if ((popped_pylrvals = (PyObject **)malloc(sizeof(PyObject *) * amt)) == NULL) + onError("Memory Error"); + if (stack->si < amt) { + PyErr_SetString(PyLRengineError, "popping too much from stack!!!"); + return 0; + } + while (amt > 0 && stack->si >= 0) { + if ((popped_pylrvals[c] = (PyObject *)malloc(sizeof(PyObject))) == NULL) + onError("Memory Error"); + if ((stack->si - 1) % stack->chunksize) { + stack->data->bucket[(stack->si -1) % stack->chunksize]->tok = -1; + stack->data->bucket[(stack->si -1) % stack->chunksize]->state = -1; + popped_pylrvals[c] = stack->data->bucket[(stack->si -1) % stack->chunksize]->pylrval; + stack->data->bucket[(stack->si -1) % stack->chunksize]->pylrval = NULL; + } else { + stack->data->bucket[0]->tok = -1; + stack->data->bucket[0]->state = -1; + popped_pylrvals[c] = stack->data->bucket[0]->pylrval; + stack->data->bucket[0]->pylrval = NULL; + tmpsd = stack->data->next; + free(stack->data); + stack->data = tmpsd; + } + amt--; stack->si--; c++; /* not quite ;) */ + } + return popped_pylrvals; +} + +#define stackstate(stack) \ +(((stack)->data == NULL)?\ + 0:\ + (stack)->data->bucket[((stack)->si - 1) % (stack)->chunksize]->state) + + +/*********************************************************************** + * Production Info related functions + ***********************************************************************/ + +static prodinfo_type ** Py_prodinfo2prodinfo (parserobj, py_prodinfo) + parserobject * parserobj; + PyObject * py_prodinfo; +{ + prodinfo_type ** prodinfo; + PyObject * prodtuple; + int listsize; + register int listi; + listsize = PyList_Size(py_prodinfo); + if (listsize == -1) + onError("production info table is not a list!"); + parserobj->prodinfosize = listsize; + if ((prodinfo = (prodinfo_type **) malloc (sizeof (prodinfo_type *) * listsize)) == NULL) + onError("No more Mem!"); + for (listi=0; listi < listsize; listi++) { + if ((prodinfo[listi] = (prodinfo_type *) malloc (sizeof(prodinfo_type))) == NULL) + onError("Memory"); + prodtuple = PyList_GetItem(py_prodinfo, listi); + if (! PyTuple_Check(prodtuple)) + onError("Corrput Prodinfo table, must contain tuples of (len, callable)"); + prodinfo[listi]->len = (short int) PyInt_AsLong(PyTuple_GetItem(prodtuple, 0)); + if ((prodinfo[listi]->func = (PyObject *) malloc (sizeof(PyObject))) == NULL) + onError("Memory"); + prodinfo[listi]->func = PyTuple_GetItem(prodtuple, 1); + prodinfo[listi]->lhsi = (int) PyInt_AsLong(PyTuple_GetItem(prodtuple, 2)); + if ((! PyCallable_Check(prodinfo[listi]->func)) && (prodinfo[listi]->func != Py_None)) + onError("corrupt prodinfo data, must contain tuples of (len, callable)"); + Py_XINCREF(prodinfo[listi]->func); + } + return prodinfo; +} + +static PyObject * prodinfo2Py_prodinfo(prodinfo, sz) + prodinfo_type ** prodinfo; + int sz; +{ + int i; + PyObject * list; + PyObject * tuple; + PyObject * len; + PyObject * func; + PyObject * lhsi; + list = PyList_New(sz); + for (i=0; ilen); + lhsi = Py_BuildValue("i", prodinfo[i]->lhsi); + func = prodinfo[i]->func; + PyTuple_SetItem(tuple, 0, len); + PyTuple_SetItem(tuple, 1, func); + PyTuple_SetItem(tuple, 2, lhsi); + PyList_SetItem(list, i, tuple); + } + return list; +} + +/*********************************************************************** + * the goto table, show and set routines + ***********************************************************************/ + +#define GOTOERR -1 + +static void * mkgototable(parser, pygotos) + parserobject * parser; + PyObject * pygotos; +{ + register int outerlen; + register int outerct; + register int innerlen; + register int innerct; + int ** gotos; + PyObject * innerlist; + PyObject * py_entry; + outerlen = PyList_Size(pygotos); + parser->goto_x = 0; + parser->goto_y = 0; + parser->gototable = NULL; + if (outerlen == -1) + onError("goto table must be a list of lists!"); + if ((gotos = (int **) malloc(sizeof(int *) * outerlen)) == NULL) + onError("Memory Error"); + for (outerct = 0; outerct < outerlen; outerct++) { + innerlist = PyList_GetItem(pygotos, outerct); + innerlen = PyList_Size(innerlist); + if (innerlen == -1) + onError ("goto table must be a list of lists!"); + if ((gotos[outerct] = (int *) malloc (sizeof(int) * innerlen)) == NULL) + onError("Memory Error"); + for (innerct = 0; innerct < innerlen; innerct++) { + py_entry = PyList_GetItem(innerlist, innerct); + if ((! PyInt_Check( py_entry)) && (py_entry != Py_None)) + onError("goto table must be a list of list of either ints or None!"); + if (py_entry == Py_None) { + gotos[outerct][innerct] = GOTOERR; + } + else { + gotos[outerct][innerct] = (int) PyInt_AsLong(py_entry); + } + } + } + parser->goto_x = outerlen; + parser->goto_y = innerlen; + parser->gototable = gotos; +} + + +static PyObject * show_gotos(self, args) + parserobject * self; + PyObject * args; +{ + register int x; + register int y; + for (x=0; x < self->goto_x; x++) { + for (y=0; y < self->goto_y; y++) { + printf("%d ", self->gototable[x][y]); + } + printf ("\n"); + } + Py_INCREF(Py_None); + return Py_None; +} + + + + +/*********************************************************************** + * Action Table set and show + ***********************************************************************/ +#define ACTERR -1 + +static void * mkactiontable(parser, pyactions) + parserobject * parser; PyObject * pyactions; +{ + register int outerlen; + register int outerct; + register int innerlen; + register int innerct; + actiontype *** actions; + PyObject * innerlist; + PyObject * py_tuple; + PyObject * py_act; + char * cact; + PyObject * py_arg; + int tuplelen; + parser->act_x = 0; + parser->act_y = 0; + parser->actiontable = NULL; + outerlen = PyList_Size(pyactions); + if (outerlen == -1) + onError("goto table must be a list of lists!"); + if ((actions = (actiontype ***) malloc(sizeof(actiontype *) * outerlen)) == NULL) + onError("Memory Error"); + for (outerct = 0; outerct < outerlen; outerct++) { + innerlist = PyList_GetItem(pyactions, outerct); + innerlen = PyList_Size(innerlist); + if (innerlen == -1) + onError ("goto table must be a list of lists!"); + if ((actions[outerct] = (actiontype **) malloc (sizeof(actiontype *) * innerlen)) == NULL) + onError("Memory Error"); + for (innerct = 0; innerct < innerlen; innerct++) { + if ((actions[outerct][innerct] = (actiontype *) malloc(sizeof(actiontype))) == NULL) + onError("Memory Error"); + py_tuple = PyList_GetItem(innerlist, innerct); + if (! PyTuple_Check(py_tuple)) + onError("goto table must be a list of list of tuples!"); + tuplelen = PyTuple_Size(py_tuple); + if (tuplelen != 2) + onError("goto table must contain entries of tuples of length 2"); + py_act = PyTuple_GetItem(py_tuple, 0); + py_arg = PyTuple_GetItem(py_tuple, 1); + if ((! PyString_Check(py_act)) || (! PyInt_Check(py_arg))) + onError("goto table's entries must be tuples of type string, int"); + actions[outerct][innerct]->act = (short) *(PyString_AsString(py_act)); + actions[outerct][innerct]->arg = (int) PyInt_AsLong(py_arg); + } + } + parser->act_x = outerlen; + parser->act_y = innerlen; + parser->actiontable = actions; +} + + +static PyObject * show_actions(self, args) + parserobject * self; + PyObject * args; +{ + register int x; + register int y; + for (x=0; x < self->act_x; x++) { + for (y=0; y < self->act_y; y++) { + printf("(%c, %d), ", self->actiontable[x][y]->act, self->actiontable[x][y]->arg); + } + printf ("\n"); + } + Py_INCREF(Py_None); + return Py_None; +} + +/*********************************************************************** + * Parser Type Info and internal routines + ***********************************************************************/ + + +staticforward PyTypeObject ParserType; + +#define is_parserobject(v) ((v)->ob_type == &ParserType) + + +/*********************************************************************** + * Parser Methods + ***********************************************************************/ + +static PyObject * +parser_parse(self, args) + parserobject * self; + PyObject * args; +{ + int tok, curstate, i, tuple_i; + PyObject * pylrval; + PyObject * fargs; + PyObject * fres; + actiontype * act; + PyObject ** pylrvals; + if (! PyArg_ParseTuple(args, "iO", &tok, &pylrval)) { + return NULL; + } + Py_XINCREF(pylrval); + add2buf(self->inbuf, tok, pylrval); + if ( self->toksadded < 1) { + self->toksadded++; + return Py_BuildValue("i", 1); + } + if ((stackstate(self->stack) < 0) || (gettoken(self->inbuf) < 0)) + onError("PyLRTableIndexError"); + act = self->actiontable[stackstate(self->stack)][gettoken(self->inbuf)]; + if (act == NULL) { + onError("PyLRTableError, couldn't retrieve action"); + } + if (act->act == SHIFT) { + push(self->stack, gettoken(self->inbuf), act->arg, getpylrval(self->inbuf)); + incbi(self->inbuf); + return Py_BuildValue("i", 1); + } else if (act->act == REDUCE) { + pylrvals = pop(self->stack, self->prodinfo[act->arg - 1]->len); + if (PyErr_Occurred()) { return NULL; } + curstate = stackstate(self->stack); + fargs = PyTuple_New(self->prodinfo[act->arg - 1]->len); + for (i=0; i < self->prodinfo[act->arg - 1]->len ; i++) { + tuple_i = ((self->prodinfo[act->arg -1]->len - i) -1); + PyTuple_SetItem(fargs, tuple_i, pylrvals[i]); + } + fres = PyObject_CallObject(self->prodinfo[act->arg - 1]->func, fargs); + if (PyErr_Occurred()) + return NULL; + Py_XINCREF(fres); + /* Py_DECREF(fargs);*/ + push(self->stack, act->arg, self->gototable[curstate][self->prodinfo[act->arg - 1]->lhsi], fres); + return Py_BuildValue("i", 1); + } else if (act->act == ACCEPT) { + return Py_BuildValue("i", 0); + } else { + PyErr_SetString(PyLRengineError, "SyntaxError while parsing"); + return NULL; + } +} + +static PyObject * +parser_show_stack(self, args) + parserobject * self; + PyObject * args; +{ + if (! PyArg_ParseTuple(args, "")) + return NULL; + show_stack(self->stack); + Py_XINCREF(Py_None); + return Py_None; +} + +static PyObject * +parser_show_inbuf(self, args) + parserobject * self; + PyObject * args; +{ + if (! PyArg_ParseTuple(args, "")) + return NULL; + dumpinbuf(self->inbuf); + Py_XINCREF(Py_None); + return Py_None; +} + + +static struct PyMethodDef Parser_methods[] = { + { "parse", parser_parse, 1}, + { "showstack", parser_show_stack, 1}, + { "showbuf", parser_show_inbuf, 1}, + { "showgotos", show_gotos, 1}, + { "showacts", show_actions, 1}, + { NULL, NULL}, /* sentinel */ +}; + +/*********************************************************************** + * Basic type operations for ParserType + ***********************************************************************/ + +static parserobject * +newparserobject (pyprodinfo, pyactions, pygotos, bufchunksize, stackchunksize) + PyObject * pyprodinfo; + PyObject * pyactions; + PyObject * pygotos; + int bufchunksize; + int stackchunksize; +{ + parserobject *p; + p = PyObject_NEW(parserobject, &ParserType); + if (p == NULL) + onError("memory in init obj..."); + p->stack = init_stack(stackchunksize); + p->inbuf = init_inbuf(bufchunksize); + mkgototable(p, pygotos); + mkactiontable(p, pyactions); + p->prodinfo = Py_prodinfo2prodinfo(p, pyprodinfo); + p->toksadded = 0; + if (PyErr_Occurred()) + return NULL; + return p; +} + +static void +parser_dealloc(self) + parserobject *self; +{ + PyMem_DEL(self); +} + +static int +parser_print(self, fp, flags) + parserobject * self; + FILE * fp; + int flags; +{ + fprintf(fp, "\n", self); + return 0; +} + + +static PyObject * +parser_getattr(self, name) + parserobject * self; + char * name; +{ + if (strcmp(name, "state") == 0) + return Py_BuildValue("i", stackstate(self->stack)); + if (strcmp(name, "stacksize") == 0) + return Py_BuildValue("i", (self->stack->si)); + if (strcmp(name, "prodinfo") == 0) + return prodinfo2Py_prodinfo(self->prodinfo, self->prodinfosize); + if (strcmp(name, "__members__") == 0) + return Py_BuildValue("[sss]", "state", "stacksize", "prodinfo"); + else + return Py_FindMethod(Parser_methods, (PyObject *) self, name); +} + + +static PyTypeObject ParserType = { + PyObject_HEAD_INIT(&PyType_Type) + 0, + "NewEngine", /* type name */ + sizeof(parserobject), /* basic size */ + 0, /* itemsize */ + (destructor) parser_dealloc, + (printfunc) parser_print, + (getattrfunc) parser_getattr +}; + + +/*********************************************************************** + * Module Logic + ***********************************************************************/ + +static PyObject * +parsernew(self, args) +PyObject* self; +PyObject* args; +{ + PyObject* pyprodlengths = NULL; + PyObject* pyactions = NULL; + PyObject* pygotos = NULL; + PyObject* res = NULL; + int bufchunksize=50; + int stackchunksize=100; + CHECK_MALLOC(pyprodlengths) + CHECK_MALLOC(pyactions) + CHECK_MALLOC(pygotos) + if (!PyArg_ParseTuple(args, "O!O!O!|ii", &PyList_Type, &pyprodlengths, + &PyList_Type, &pyactions, &PyList_Type, &pygotos, + &bufchunksize, &stackchunksize)) + goto finally; + res = (PyObject*) newparserobject(pyprodlengths, pyactions, pygotos, bufchunksize, stackchunksize); +finally: + Py_XDECREF(pyprodlengths); + Py_XDECREF(pyactions); + Py_XDECREF(pygotos); + return res; +} + + +static struct PyMethodDef PyLRengine_methods[] = { + {"NewEngine", (PyCFunction)parsernew}, + {NULL, NULL} +}; + + +void +initPyLRengine() +{ + PyObject *m, *d; + m = Py_InitModule("PyLRengine", PyLRengine_methods); + d = PyModule_GetDict(m); + if (PyErr_Occurred()) + Py_FatalError("can't initialize module PyLRengine"); +} + + + + + + + + + + + + + diff --git a/PyLR/README b/PyLR/README new file mode 100644 index 00000000..bfb5179d --- /dev/null +++ b/PyLR/README @@ -0,0 +1,44 @@ +You must have python 1.5b1 or newer to run PyLR, as it works with the +builtin package support of that version. + +To build: + +1) decide whether you want the PyLRengine module to be a shared library. +If not, comment out the '*shared*' line in Setup. + +2)type make -f Makefile boot; make + +that should build the package. + + +To install: + +If you want to install PyLR in your python distribution, just copy +over the PyLR directory to your site-packages directory. If you want +to save a little space, take a look at the __init__ file doc string in +the top directory and it shows the necessary files (distribution minus +Makefile, sedscript, etc). Also, there is a script (pgen.py) you may want +in /usr/local/bin or something more accessible as executable from your shell. + +There is html documentation in the doc/ directory. + + + +To test: + +pgen.py PyLR/tstpspec tst +diff tst PyLR/Parsers/gram.py + +the only difference should be the date line. + + +Feedback: + +send comments/suggestions/bugreports/contributions to +scott@chronis.icgroup.com + + + +thanks, + +scott diff --git a/PyLR/Setup b/PyLR/Setup new file mode 100644 index 00000000..bbe76a14 --- /dev/null +++ b/PyLR/Setup @@ -0,0 +1,2 @@ +*shared* +PyLRengine PyLRenginemodule.c diff --git a/PyLR/Setup.in b/PyLR/Setup.in new file mode 100644 index 00000000..bbe76a14 --- /dev/null +++ b/PyLR/Setup.in @@ -0,0 +1,2 @@ +*shared* +PyLRengine PyLRenginemodule.c diff --git a/PyLR/__init__.py b/PyLR/__init__.py new file mode 100644 index 00000000..9eda6cda --- /dev/null +++ b/PyLR/__init__.py @@ -0,0 +1,39 @@ +""" +This package has the following modules and characteristics: + +(-) = not done yet +(*) = done +(?) = working on it + +PyLR/ the top level module Language Genration Tools + __init__.py(*) this file + Lexer.py(*) defines the Lexer interface that the parser will use, uses re + Lexers/(?) a package to put lexers for different things + __init__ imports GrammarLex class + GrammarLex.py The module that defines the lexer for grammar specifications + Grammar.py(*) The module for dealing with grammars + PyLRenginemodule.so(*) The engine behind a LR parser (can do SLR, LR, and LALR) + Parser.py (*) A class interface to a parser + Parsers/(?) A package for storing Parsers + __init__ imports GrammarParser class + gram.py(*) the definition of the GrammarParser (import into Parsers/ namespace) + pgen.py(*) a script for parser generation + parsertemplate.py the doc string of this module is the template for parser generation + + +""" + + +import Parser,Lexers,Parsers +from Lexer import Lexer,SKIPTOK + + +__version__ = "$Id$" + + + + + + + + diff --git a/PyLR/config.c b/PyLR/config.c new file mode 100644 index 00000000..6e27b73f --- /dev/null +++ b/PyLR/config.c @@ -0,0 +1,75 @@ +/* Generated automatically from /usr/lib/python1.5/config/config.c.in by makesetup. */ +/* -*- C -*- *********************************************** +Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam, +The Netherlands. + + All Rights Reserved + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appear in all copies and that +both that copyright notice and this permission notice appear in +supporting documentation, and that the names of Stichting Mathematisch +Centrum or CWI or Corporation for National Research Initiatives or +CNRI not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior +permission. + +While CWI is the initial source for this software, a modified version +is made available by the Corporation for National Research Initiatives +(CNRI) at the Internet address ftp://ftp.python.org. + +STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH +CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL +DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR +PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. + +******************************************************************/ + +/* Module configuration */ + +/* !!! !!! !!! This file is edited by the makesetup script !!! !!! !!! */ + +/* This file contains the table of built-in modules. + See init_builtin() in import.c. */ + +#include "Python.h" + + +extern void initregex(); +extern void initpcre(); +extern void initposix(); +extern void initsignal(); + +/* -- ADDMODULE MARKER 1 -- */ + +extern void PyMarshal_Init(); +extern void initimp(); + +struct _inittab _PyImport_Inittab[] = { + + {"regex", initregex}, + {"pcre", initpcre}, + {"posix", initposix}, + {"signal", initsignal}, + +/* -- ADDMODULE MARKER 2 -- */ + + /* This module "lives in" with marshal.c */ + {"marshal", PyMarshal_Init}, + + /* This lives it with import.c */ + {"imp", initimp}, + + /* These entries are here for sys.builtin_module_names */ + {"__main__", NULL}, + {"__builtin__", NULL}, + {"sys", NULL}, + + /* Sentinel */ + {0, 0} +}; diff --git a/PyLR/doc/PyLR.html b/PyLR/doc/PyLR.html new file mode 100644 index 00000000..957b59c4 --- /dev/null +++ b/PyLR/doc/PyLR.html @@ -0,0 +1,142 @@ + + + PyLR -- Fast LR parsing in python + +
+

PyLR -- Fast LR parsing in python

+
+
+ + +
+

+

What is PyLR?

+ +PyLR is a package of tools for creating efficient parsers in python, +commonly known as a compiler compiler. PyLR is currently under +development. A ful release is almost complete, but there are still a few missing +features that would make it much nicer. + +

+PyLR (pronounced 'pillar') was motivated by the frequencly with which parsers are hand +coded in python, the performance demands that these parsers are subject to (you just can't beat +native machine code for speed...), and academic curiosity (I wanted to really know how LR +parsing works). +

+ + +

What is the current state of PyLR?

+PyLR currently has class interfaces to a Grammar, a Lexer, an extension module +defining a parsing engine builtin type, and a parser generator script. All of these components +are based on sound parsing theory, but nevertheless haven't been tested by anyone but it's author. +The code as is stands can definitely be of use to anyone hand writing a parser in python, but some +of the nicer things in the complete package just haven't been done yet .

+PyLR is therefore under development, as it will always be. PyLR will be given a release number +once it supplies the following tools: +

    + + +
  • write an 'engine' module that implements the LR parsing +algorythm in C with callbacks to python functions. (done)
  • + + +
  • write a Lexer class using re (done)
  • + + +
  • write a Grammar class that will take as input a context +free grammar and produce the parsing tables necessary to complement +the engine. This is to be done with LR(1) grammars (done and then +deleted -- extremely inefficient) and LALR(1) Grammars(done, +except with epsilon (empty) productions, much more efficient).
  • + + +
  • add a user interface -- manually write a lexer and Grammar +using the exisiting classes to parse lexer and grammar specifications +modelled after lex/flex and yacc/bison. (done for Grammars) +
  • + +
  • write documentation. (usable, but not done) +
  • + +
  • (post release) add grammars to various languages to the + distribution. +
  • +
+In addtion, I have the following plan for the project: +
    +
  • make 'epsilon' (empty) productions work (many of them work now, but not all)
  • + +
  • optimize the Lexer. Try to join it into one regular expression and derive + function calls from match object data. (done, still the slowest part of parsing)
  • + +
  • add error specification routines.
  • + +
  • change the parser generation algorithm to use only kernel LALR(1) items + in the computation of shift actions and gotos in the goto table. This + should significantly enhance the rate of parser generation, which is currently + a bit slow, but certainly acceptable for medium-sized grammars (< ~100 productions) + (done!) this version +
  • + + +
  • write a Parser for sql, as used in gadfly +
  • + +
  • add operator precedence as an option to the parser specification (further down the road...)
  • + +
+These things will probably be done over the next month or two (as I only have free time to give +to this project...Ahemmm...). +

+

Where do I get PyLR?

+You can get PyLR in one of two places, here +or here. Both versions will be in sync with each other. +

+ +

What will be added to PyLR?

+In addition to the list of things to finish before a full release, +is published, PyLR could be used as the basis for an efficient datapath analyzer (optimizer), +for a front end to translation from one language to another, for type checking code, etc.

+As soon as the first release is completed, Tools to aid in all these things could well be added +to the package. Also, anyone wanting to contribute parser specifications for +languages of general use is most welcome. +

+ +

Where do I find out more about parsing?

+Parsing was for a long time a big challenge for computer scientists. The need for +computer parsing originally came about with the first writing of compilers. Since then, the +theory behind parsing has been studied in depth and has pretty much stabilized as it no longer +really presents a big problem in terms of speed or size in terms of parsing todays computer +languages. One standard means of parsing that has been used for years because of its efficiency +is LR parsing (more particularly, LALR parsing). A lot of good information is in + +Lex and Yacc , + +The Dragon Book , and +it seems like the only place to find good info on LALR parsing is in + +
+DeRemer, F.; and Pennello, T.Efficient computation of LALR(1) look-ahead sets, ACM Trans.
+ Program. Lang. Syst. 4 (1982), 615-649.
+
+ +Finally, to find out how to use PyLR, see thePyLR manual + +

How do I contribute to PyLR?

+mail me. + + + + + + + + + diff --git a/PyLR/doc/manual.html b/PyLR/doc/manual.html new file mode 100644 index 00000000..ef22da00 --- /dev/null +++ b/PyLR/doc/manual.html @@ -0,0 +1,313 @@ + + PyLR maual + + + +
+

PyLR Manual

+
+ +This is the PyLR parser generator manual. PyLR is a parser generator package for +use with python (version 1.5b1 or better). This manual addresses how to use the +package to produce parsers. +

+

+ +
+

+

+

Audience

+Parsing can be very complicated stuff, and it helps to understand what exactly is +happening when something is parsed when writing a parser. Unfortunately (for the impatient), +the topic of Parsing has been the subject of many a dissertation. This document will present +two views on the data it presents. One is a technical view which will contain terms without +defining them. These terms are generally understood by those who have studied parsing theory +(such as LALR, shift-reduce, etc), and probably not understood by those that haven't. For this +reason, I have attempted to include an intuitive view whenever possible, particularly in the +section The Basics. There should be enough in that section to let anyone +interested who is interested and familiar with python write a parser. +

+

The Basics


+ +This section refers to writing lexers, Grammars, and then producing a parser with +these parts. In PyLR, a lexer is part of a parser. This simplifies the interface to +actually doing the parsing. There is an 'engine' which takes the output of the lexer and triggers +the back end of parsing. So we'll start with writing a lexer. +
    +
  • +

    Writing a Lexer


    +When some text is to be parsed, it first has to go through lexical analysis. This +process is done with a lexer. PyLR provides a base Lexer class to help write a lexer. +The process isn't hard. A lexer just returns the atomic parts of the text. You define what is +atomic by using regular expressions to match the atomic parts. Each atomic definition +you give is automatically given a token value (an int). When the lexer scans text, it returns +a stream of (token, value) pairs where the the token is the token value that was assigned +to the match definition and the the value is an arbitrary python value (class, string, int, whatever). +The (token, value) pair is then passed to the parser for further processing. +

    + + + Frequently, lexers will return the matched text as the +value in the (token, value) pair. This is the +default when you subclass the provided Lexer class. However, there +are a lot of different things you may want to happen upon finding a +match. For example, sometimes you will want to match something but +not use the match or pass it on to the parser. +

    + + There is a function in the base class that +provides for all these and more options. It is the
    +.addmatch(compiled_regex, tokenname="", function=None, +flags=MAPTOK|EXECFN)
    method. This method requires only a regular +expression as its argument, but in practice, token names should be passed along with +the re. This practice will make your grammar more readable and easier +to write later.
    The function argument, if specified, will make the +lexer execute that function with the resulting match object as it's +one and only argument. The lexer will then return the return value of +the function as the value in the (token, value) pair +it returns. By default, the lexer will just return the token and the associated +matched text. +
    + The flags argument not only defaults to the reasonable MAPTOK|EXECFN, but also adopts to +the values of the other arguments you pass. This way, you dont' have to bother with them much. The one +time it's common to use the flags is when you want the lexer to match something but not return anything until +the next match. It is common to have whitespace treated in this fashion. For this option, you use +.addmatch(re.compile(r"\s+"), "", None, Lexer.SKIPTOK). The example below utilizes all these +options. +

    + Finally, please note the call of the .seteof() function at the end of the __init__ +method. This is necessary for all subclassed lexers. The reason it is there is that the token value +of EOF is expected to be one greater than any other token value by the parser. Your lexer will not +work with the parser api without this call. +

    +Example +

    +from PyLR import Lexer
    +import re, string
    +
    +#
    +# this function will handle matches to an integer.  It passes the
    +# integer value to the parser and does the conversion here.
    +#
    +def intfunc(m):
    +    return string.atoi(m.group(0))
    +
    +
    +class mathlex(Lexer.Lexer):
    +
    +    #
    +    # define the atomic parts with regular expressions
    +    # 
    +
    +    INT = re.compile(r"([1-9]([0-9]+)?)|0")  # matches an integer
    +    LPAREN = re.compile(r"\(")              # matches '('
    +    RPAREN = re.compile(r"\)")              # matches ')' 
    +    
    +    TIMES = re.compile(r"\*")               # matches '*'
    +    PLUS = re.compile(r"\+")                # matches '+'
    +    WS = re.compile(r"\s+")                 # matches whitespace
    +
    +
    +    def __init__(self):
    +        #
    +        # initialize with the base class
    + 	#
    +	Lexer.Lexer.__init__(self)
    +	#
    +	# addmatch examples
    +	#
    +	self.addmatch(self.INT, idfunc, "INT")
    +	for p,t in ( (self.PLUS, "PLUS"), (self.TIMES,"TIMES"),
    +		     (self.LPAREN, "LPAREN"), (self.RPAREN, "RPAREN"),):
    +	    self.addmatch(p, None, t)
    +	self.addmatch(self.ws, None, "", Lexer.SKIPTOK)
    +	self.seteof()
    +
    +
    +# create the lexer
    +lexer = mathlex()
    +# test it with the interactivetest method
    +lexer.interactivetest()
    +
    + +
  • +
    +
  • Writing a Grammar


    +The grammar you write is somewhat easier than the lexer. You don't have +to code anything. There is a program in the PyLR distribution called pgen.py +that will take your Grammar specification and produce a parser for you. The parser that is +produced is of the shift-reduce variety of LR parsers and uses LALR(1) items to help produce +the parsing tables. In other words, it uses a parsing algorithm that is quite efficient (implemented +in C) and will handle most modern day programming language constructs without a problem. These +qualities have made this parsing algorithm a very commonly used one in compiler construction since +October 1982. +

    + When you write a grammar, you are specifying a context free grammar in normal form, +with a few addons to help generate the parser in Python. In other words, you specify a series +of productions. For example, to specify a very simple math grammar that will work with the +above lexer, you may state something like this: + +

    +expression: expression PLUS term 
    +     |      term;
    +
    +term: term TIMES factor 
    +  |   factor;
    +
    +factor: LPAREN expression RPAREN 
    +   |    INT;
    +
    + +The identifiers in all uppercase are conventionally terminal symbols. +These will be identified by the lexer and returned to the parser. The identifiers +in all lowercase are the nonterminal symbols. Each nonterminal must appear +on the left somewhere. The corresponding right side may have terminals or non terminals. +You may not have empty (epsilon) right hand sides (yet). +

    +Whenever the parser recognizes a production, it will call a function. You may specify +the name of the method of the parser class to be invoked for a production by adding +a parenthesized name to the right of the production. The above grammar rewritten with +method name specifications looks like this (This part will become more clear after the next step, +stay with it!). + +

    +expression: expression PLUS term (addfunc)
    +     |      term;
    +
    +term: term TIMES factor (timesfunc)
    +  |   factor;
    +
    +factor: LPAREN expression RPAREN (parenfunc)
    +   |    INT;
    +
    + +
  • + +
  • Putting it all together: making the parser


    + When you create a parser, you are creating a class that is intended to act like +a class in library code. That is, it will mostly be used by subclassing that class. +The parser you create will parse what it was intended to, but it won't do anything +with the parse tree unless you subclass it and define some special methods. +

    +Those methods must have the name specified in the grammar you wrote. For example, if you +built a parser for the above grammar, in order for it to actually add things together, +you would have to subclass the class that was produced and then define the methods +addfunc, timesfunc, and parenfunc. When each of these methods is called +it will be passed the values on the right hand side of the corresponding production as arguments. +Those values are either the value returned by the lexer, if the symbol is terminal, or +a value returned by one of these special methods, if the symbol is a nonterminal. +

    +In the above example, since the rest of the productions only have one item, it doesn't really matter +whether or not they have methods, the parser just calls a reasonable default. +

    +As you can see, we've defined most of what is necessary for building a parser. But the above should tell +you that there are a few other things that you may want to define, like the name of the class that +is produced, or what lexer is used with the parser. Describing these things along with a grammar like +the example above is writing a parser specification for PyLR. A reasonable parser specification for the +example we've been following: +

    +_class SimpleMathParser
    +_lex mathlex.mathlex()
    +_code from PyLR.Lexers import mathlex
    +"""
    +expression: expression PLUS term (addfunc)
    +     |      term;
    +
    +term: term TIMES factor (timesfunc)
    +  |   factor;
    +
    +factor: LPAREN expression RPAREN (parenfunc)
    +   |    INT;
    +"""
    +
    +the _class keyword defines the name of the class that the parser will take +the _lex keyword defines the code used to intialize that parser's lexer +the _code keyword defines extra code at the top of the output file. Multiple +instances of this keyword will cause the extra source code (in python) to be accumulated. +the triple quotes delimit the grammar section. +

    +Please note, the above syntax is subject to change as this is an alpha release and I feel +that it can be improved upon. +

    + now you can create a parser. Just use the pgen.py script and it will output +your source code: +

    +pgen.py mathparserspec tst.py
    +chronis 3:34am $ python
    +Python 1.5b1 (#1, Nov 27 1997, 19:51:47)  [GCC 2.7.2] on linux2
    +Copyright 1991-1995 Stichting Mathematisch Centrum, Amsterdam
    +>>> import tst
    +>>> dir(tst)
    +['PyLR', 'SimpleMathParser', '__builtins__', '__doc__', '__file__', '__name__', '_actiontable', '_gototable', '_prodinfo', 'mathlex']
    +>>> print tst.SimpleMathParser.__doc__
    +
    +    this class was produced automatically by the PyLR parser generator.
    +    It is meant to be subclassed to produce a parser for the grammar
    +
    +expression -> expression PLUS term           (addfunc)
    +        | term;                              (unspecified)
    +term -> term TIMES factor                    (timesfunc)
    +        | factor;                            (unspecified)
    +factor -> LPAREN expression RPAREN           (parenfunc)
    +        | INT;                               (unspecified)
    +
    +    While parsing input, if one of the above productions is recognized,
    +    a method of your sub-class (whose name is indicated in parens to the 
    +    right) will be invoked. Names marked 'unspecified' will not me invoked. 
    +    
    +    usage: 
    +
    +class MySimpleMathParser(SimpleMathParser):
    +    # ...define the methods for the productions... 
    +
    +p = MySimpleMathParser(); p.parse(text)
    +    
    +>>> class MP(tst.SimpleMathParser):
    +...     def __init__(self):
    +...             tst.SimpleMathParser.__init__(self)
    +...     def addfunc(self, left, plus, right):
    +...             print "%d + %d" % (left, right)
    +...             return left + right
    +...     def parenfunc(self, lp, expr, rp):
    +...             print "handling parens"
    +...             return expr
    +...     def timesfunc(self, left, times, right):
    +...             print "%d * %d" % (left, right)
    +...             return left * right
    +... 
    +>>> mp = mathparser()
    +>>> mp.parse("4 * (3 + 2 * 5)")
    +2 * 5
    +3 + 10
    +handling parens
    +4 * 13
    +
    +
    +
    + +
  • +
+ + +

Structure

+Nothing yet, sorry it's an alpha, read the source. + +

API

+Nothing yet, sorry it's an alpha. Read the source. + + diff --git a/PyLR/gramnew.py b/PyLR/gramnew.py new file mode 100644 index 00000000..ed5afdde --- /dev/null +++ b/PyLR/gramnew.py @@ -0,0 +1,170 @@ +""" + out -- created Sun Dec 14 21:41:11 1997 + +This file was automatically generated by the PyLR parser generator. +It defines the tables 'actiontable', 'gototable', and 'prodinfo'. These +tables are used to give functionality to a parsing engine. It also defines +A Parser class called GrammarParser which will use this engine. It's Usage is +indicated in GrammarParser's doc-string. +""" +# +# this section contains source code added by the user +# plus 'import PyLR' +# + +import PyLR.Lexers +import PyLR.Parser +import PyLR + +# +# the action table ('s', 4) means shift to state 4, +# ('r', 4) means reduce by production number 4 +# other entries are errors. each row represents a state +# and each column a terminal lookahead symbol (plus EOF) +# these symbols are ['LEX', 'CODE', 'CLASS', 'ID', 'COLON', 'SCOLON', 'OR', 'LPAREN', 'RPAREN', 'GDEL', 'EOF'] +# +_actiontable = [ + [('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('a', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 1)], + [('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 2)], + [('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 7), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 3)], + [('r', 4), ('r', 4), ('r', 4), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 4), ('', -1)], + [('r', 5), ('r', 5), ('r', 5), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 5), ('', -1)], + [('r', 6), ('r', 6), ('r', 6), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 6), ('', -1)], + [('r', 7), ('r', 7), ('r', 7), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 7), ('', -1)], + [('r', 8), ('r', 8), ('r', 8), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 8), ('', -1)], + [('', -1), ('', -1), ('', -1), ('r', 9), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 9), ('', -1)], + [('', -1), ('', -1), ('', -1), ('r', 10), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 10), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('s', 16), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 18), ('s', 20), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('r', 11), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 11), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 12), ('r', 12), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 13), ('r', 13), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 14), ('r', 14), ('s', 23), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('s', 24), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 25), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 15), ('r', 15), ('', -1), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('s', 27), ('', -1), ('r', 16), ('r', 16), ('r', 16), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('r', 18), ('', -1), ('r', 18), ('r', 18), ('r', 18), ('', -1), ('', -1), ('', -1)], + [('', -1), ('', -1), ('', -1), ('r', 19), ('', -1), ('r', 19), ('r', 19), ('r', 19), ('', -1), ('', -1), ('', -1)] +] + + + +# +# the goto table, each row represents a state +# and each column, the nonterminal that was on the lhs of the +# reduction +# +_gototable = [ + [1, 2, 3, 9, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, 4, None, 8, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, 6, 14, None, None, None, None], + [None, None, None, None, None, 13, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, 17, 19, 22, 26], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, 21, 22, 26], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None] +] + + + +# +# This is the prodinfo table. each row represents a production +# the entries are the length of the production, the name of a method +# in an instance of the GrammarParser class below that gets called +# when that production occurs, and the index of the lhs in the +# nonterminals (as in # the gototable) +# +_prodinfo = [ + (1, 'unspecified', 0), # pspec -> ['gspec'] + (2, 'unspecified', 0), # pspec -> ['pydefs', 'gspec'] + (3, 'unspecified', 1), # gspec -> ['GDEL', 'lhsdeflist', 'GDEL'] + (2, 'unspecified', 2), # pydefs -> ['pydefs', 'pydef'] + (1, 'unspecified', 2), # pydefs -> ['pydef'] + (1, 'lexdef', 3), # pydef -> ['LEX'] + (1, 'addcode', 3), # pydef -> ['CODE'] + (1, 'classname', 3), # pydef -> ['CLASS'] + (2, 'unspecified', 4), # lhsdeflist -> ['lhsdeflist', 'lhsdef'] + (1, 'unspecified', 4), # lhsdeflist -> ['lhsdef'] + (4, 'lhsdef', 5), # lhsdef -> ['ID', 'COLON', 'rhslist', 'SCOLON'] + (1, 'singletolist', 6), # rhslist -> ['rhs'] + (3, 'rhslist_OR_rhs', 6), # rhslist -> ['rhslist', 'OR', 'rhs'] + (1, 'rhs_idlist', 7), # rhs -> ['rhsidlist'] + (4, 'rhs_idlist_func', 7), # rhs -> ['rhsidlist', 'LPAREN', 'ID', 'RPAREN'] + (1, 'unspecified', 8), # rhsidlist -> ['idlist'] + (0, 'rhseps', 8), # rhsidlist -> [] + (2, 'idl_idlistID', 9), # idlist -> ['idlist', 'ID'] + (1, 'idlistID', 9), # idlist -> ['ID'] + ] + + + + +class GrammarParser (PyLR.Parser.Parser): + """ + this class was produced automatically by the PyLR parser generator. + It is meant to be subclassed to produce a parser for the grammar + +pspec -> gspec (unspecified) + | pydefs gspec; (unspecified) +gspec -> GDEL lhsdeflist GDEL; (unspecified) +pydefs -> pydefs pydef (unspecified) + | pydef; (unspecified) +pydef -> LEX (lexdef) + | CODE (addcode) + | CLASS; (classname) +lhsdeflist -> lhsdeflist lhsdef (unspecified) + | lhsdef; (unspecified) +lhsdef -> ID COLON rhslist SCOLON; (lhsdef) +rhslist -> rhs (singletolist) + | rhslist OR rhs; (rhslist_OR_rhs) +rhs -> rhsidlist (rhs_idlist) + | rhsidlist LPAREN ID RPAREN; (rhs_idlist_func) +rhsidlist -> idlist (unspecified) + | ; (unspecified) +idlist -> idlist ID (idl_idlistID) + | ID; (idlistID) + + While parsing input, if one of the above productions is recognized, + a method of your sub-class (whose name is indicated in parens to the + right) will be invoked. Names marked 'unspecified' should be ignored. + + usage: + +class MyGrammarParser(GrammarParser): + # ...define the methods for the productions... + +p = MyGrammarParser(); p.parse(text) + """ + + def __init__(self): + lexer = PyLR.Lexers.GrammarLex() + PyLR.Parser.Parser.__init__(self, lexer, _actiontable, _gototable, _prodinfo) diff --git a/PyLR/parsertemplate.py b/PyLR/parsertemplate.py new file mode 100644 index 00000000..84b53f41 --- /dev/null +++ b/PyLR/parsertemplate.py @@ -0,0 +1,75 @@ +# +# this file's doc string is used as a template for producing PyLRtables.py. +# PyLRtables.py containes the source code to produce the engine part of a +# parser. +# +'''\ +""" + %(filename)s -- created %(date)s + +This file was automatically generated by the PyLR parser generator. +It defines the tables 'actiontable', 'gototable', and 'prodinfo'. These +tables are used to give functionality to a parsing engine. It also defines +A Parser class called %(parsername)s which will use this engine. It's usage +is indicated in %(parsername)s's doc-string. +""" +# +# this section contains source code added by the user +# plus 'import PyLR' +# +%(extrasource)s +import PyLR + +# +# the action table +# 's' means shift +# ('r',) means reduce with production n +# 'a' means accept +# '' means error +# each row represents a state and each column a terminal lookahead symbol +# (excluding symbols with Lexer.SKIPTOK of course). +# Lexer symbols are: +# %(symbols)s +# +_actiontable = %(actiontable)s + +# +# the goto table, each row represents a state +# and each column, the nonterminal that was on the lhs of the +# reduction +# +_gototable = %(gototable)s + +# +# This is the prodinfo table. each row represents a production +# the entries are the length of the production, the name of a method +# in an instance of the %(parsername)s class below that gets called +# when that production occurs, and the index of the lhs in the +# nonterminals (as in # the gototable) +# +_prodinfo = %(prodinfo)s + + +class %(parsername)s(PyLR.Parser.Parser): + """ + this class was produced automatically by the PyLR parser generator. + It is meant to be subclassed to produce a parser for the grammar + +%(grammar)s + + While parsing input, if one of the above productions is recognized, + a method of your sub-class (whose name is indicated in parens to the + right) will be invoked. Names marked 'unspecified' should be ignored. + + usage: + +class My%(parsername)s(%(parsername)s): + # ...define the methods for the productions... + +p = My%(parsername)s(); p.parse(text) + """ + def __init__(self): + lexer = %(lexerinit)s + PyLR.Parser.Parser.__init__(self, lexer, _actiontable, _gototable, _prodinfo) +''' + diff --git a/PyLR/pgen.py b/PyLR/pgen.py new file mode 100644 index 00000000..3ada0628 --- /dev/null +++ b/PyLR/pgen.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python + +import PyLR, PyLR.Grammar, sys, getopt +from PyLR.Parsers import GrammarParser + +class ParserParser(GrammarParser): + def __init__(self): + GrammarParser.__init__(self) + self.result = [] # to be populated with productions + self.funcmap = {} + self.usercode = "" + self.lexdef = "" + self.classname = "MyParser" + self.idlist = [] + + def idlistID(self, id): + "idlist -> id" + self.idlist.append(id) + return [id] + + def singletolist(self, el): + "rhslist -> rhs" + return [el] + + def idl_idlistID(self, l, el): + "idlist -> idlist id" + self.idlist.append(id) + l.append(el) + return l + + def rhs_idlist(self, l): + "rhs -> idlist" + return l + + def rhseps(self): + "rhseps -> " + return [] + + def rhs_idlist_func(self, l, lp, id, rp): + "rhs -> idlist LPAREN ID RPAREN" + self.funcmap[tuple(l)] = id + return l + + def rhslist_OR_rhs(self, l, OR, el): + "rhs -> rhslist OR rhs" + l.append(el) + return l + + def lhsdef(self, lhs, COLON, rhslist, SCOLON): + "lhsdef -> ID COLON rhslist SCOLON" + print lhs + for rhs in rhslist: + self.result.append(PyLR.Grammar.Production(lhs, rhs)) + return None + + def lexdef(self, ld): + self.lexdef = ld + + def addcode(self, code): + self.usercode = self.usercode + "\n" + code + + def classname(self, name): + self.classname = name + + def parse(self, text, outf, verbose=0): + global g, toks, lexer + PyLR.Parser.Parser.parse(self, text, verbose) + # insert the functionnames + for p in self.result: + funcname = self.funcmap.get(tuple(p.RHS), "unspecified") + p.setfuncname(funcname) + #evaluate the lexer + exec(self.usercode) + lexer = eval(self.lexdef) + + # generate the tokens for grammar + toks = lexer.getTokenList() + # change the symbols to their numbers + for p in self.result: + for si in range(len(p.RHS)): + if p.RHS[si] in toks: + p.RHS[si] = toks.indexof(p.RHS[si]) + + g = PyLR.Grammar.LALRGrammar(self.result, toks) + print g + g.extrasource = self.usercode + print "done parsing, about to start parser generation (writing to %s)" % outf + if self.lexdef: + g.writefile(outf, self.classname, self.lexdef) + else: + g.writefile(outf, self.classname) + print "done" + + +def main(): + usage = "pgen.py infile outfile" + args = sys.argv[1:] + if len(args) != 2: + print usage + sys.exit(0) + inf = args[0] + outf = args[1] + if inf == "-": + f = sys.stdin + else: + f = open(inf) + pspec = f.read() +# f.close() # dont close stdin + global pp # for use with python -i pgen.py + pp = ParserParser() + verbose=1 + pp.parse(pspec, outf, verbose) + + +if __name__ == "__main__": + main() + + diff --git a/PyLR/sedscript b/PyLR/sedscript new file mode 100644 index 00000000..cf7e0ec5 --- /dev/null +++ b/PyLR/sedscript @@ -0,0 +1,28 @@ +1i\ +# Generated automatically from Makefile.pre.in by sedscript. +s%@VERSION[@]%1.5% +s%#@SET_CCC[@]%CCC=g++% +s%@CC[@]%gcc% +s%@RANLIB[@]%ranlib% +s%@OPT[@]%-g -O2% +s%@LDFLAGS[@]%% +s%@DEFS[@]%-DHAVE_CONFIG_H% +s%@LIBS[@]%-lieee -ldl -lpthread% +s%@LIBM[@]%-lm% +s%@LIBC[@]%% +s%@MACHDEP[@]%linux2% +s%^prefix=.*%prefix= /usr% +s%^exec_prefix=.*%exec_prefix= ${prefix}% +s%@SO[@]%.so% +s%@LDSHARED[@]%gcc -shared -lc% +s%@CCSHARED[@]%-fPIC% +s%@LINKFORSHARED[@]%-Xlinker -export-dynamic% +s%@LINKCC[@]%$(CC)% +/^#@SET_CCC@/d +/^installdir=/s%=.*%= /usr% +/^exec_installdir=/s%=.*%=/usr% +/^srcdir=/s%=.*%= .% +/^VPATH=/s%=.*%= .% +/^LINKPATH=/s%=.*%= % +/^BASELIB=/s%=.*%= % +/^BASESETUP=/s%=.*%= % diff --git a/PyLR/tstpspec b/PyLR/tstpspec new file mode 100644 index 00000000..898219c8 --- /dev/null +++ b/PyLR/tstpspec @@ -0,0 +1,51 @@ +# +# this is a Grammar Spec for parsing PyLR style +# Grammars +# + +# +# this is the pydefs section, where you name the output class +# , add code, state how to initialize the lexer +# +_class GrammarParser +_code import PyLR.Lexers +_code import PyLR.Parser +_lex PyLR.Lexers.GrammarLex() + +# +# this is the Grammar spec part, where you specify +# the productions and optionally their corresponding +# method names in the generated Parser class (or subclasses +# of it) +# +""" +pspec: gspec | + pydefs gspec; + +gspec: GDEL lhsdeflist GDEL; + +pydefs: pydefs pydef | + pydef; + +pydef: LEX (lexdef) | + CODE (addcode) | + CLASS (classname); + +lhsdeflist: lhsdeflist lhsdef | + lhsdef; + +lhsdef: ID COLON rhslist SCOLON (lhsdef); + +rhslist: rhs (singletolist) | + rhslist OR rhs (rhslist_OR_rhs); + +rhs: rhsidlist (rhs_idlist) | + rhsidlist LPAREN ID RPAREN (rhs_idlist_func); + +rhsidlist: idlist + | (rhseps); + + +idlist: idlist ID (idl_idlistID) | + ID (idlistID); +""" diff --git a/README b/README new file mode 100644 index 00000000..f62948a0 --- /dev/null +++ b/README @@ -0,0 +1,25 @@ + LinkChecker + ============= + +With LinkChecker you can check your HTML documents for broken links. +Features: +o recursive checking +o multithreaded +o output can be colored or normal text, HTML, SQL or a GML sitemap graph +o HTTP, FTP, mailto:, Gopher, Telnet and local file links are supported + Javascript and HTTPS links are currently ignored +o restrict link checking to your local domain +o HTTP proxy support +o give username/password for HTTP and FTP authorization +o robots.txt exclusion protocol support + +LinkChecker is licensed under the GNU Public License. +Credits go to Guido van Rossum for making Python. His hovercraft is +full of eels! +As this program is directly derived from my Java link checker, additional +credits go to Robert Forsman (the author of JCheckLinks) and his +robots.txt parse algorithm. + +I want to thank everybody who gave me feedback, bug reports and +suggestions. + diff --git a/README.dns b/README.dns new file mode 100644 index 00000000..98a793b7 --- /dev/null +++ b/README.dns @@ -0,0 +1,84 @@ +Release 2.2, Mon Apr 27 22:59:16 EST 1998 + +This is a test release of the DNS code, as originally written by +Guido van Rossum, and with a hopefully nicer API bolted over the +top of it by Anthony Baxter . It's also in a +python 1.5 package. + +There are several known bugs/unfinished bits + +- processing of AXFR results is not done yet. +- something I've done recently has broken the DnsAsyncRequest(). Bummer. +- doesn't do IPv6 DNS requests (type AAAA) (as per [RFC 1886]) +- docs, aside from this file +- all sorts of other stuff that I've probably forgotten. + +Stuff it _does_ do: + processes /etc/resolv.conf - at least as far as nameserver directives go. + tries multiple nameservers. + nicer API - see below. + returns results in more useful format. + optional timing of requests. + default 'show' behaviour emulates 'dig' pretty closely. + support for asyncore.py ### NOTE: currently broken a bit. + + +To use: + +import DNS +reqobj=DNS.Request(args) +reqobj.req(args) + +args can be a name, in which case it takes that as the query, and/or a series +of keyword/value args. (see below for a list of args) + +when calling the 'req()' method, it reuses the options specified in the +DNS.Request() call as defaults. + +options are applied in the following order: + those specified in the req() call + or, if not specified there, + those specified in the creation of the Request() object + or, if not specified there, + those specified in the DNS.defaults dictionary + +name servers can be specified in the following ways: + by calling DNS.ParseResolvConf(), which will load the DNS.servers + from the system's /etc/resolv.conf file + by specifying it as an option to the request + by manually setting DNS.defaults['server'] to a list of server IP + addresses to try + XXXX It should be possible to load the DNS servers on a windows or + mac box, from where-ever they've squirrelled them away + +name="host.do.main" # the object being looked up +qtype="SOA" # the query type, eg SOA, A, MX, CNAME, ANY +protocol="udp" # "udp" or "tcp" - usually you want "udp" +server="nameserver" # the name of the nameserver. Note that you might + # want to use an IP address here +rd=1 # "recursion desired" - defaults to 1. +other: opcode, port, ... + +There's also some convenience functions, for the lazy: + +to do a reverse lookup: +>>> print DNS.revlookup("192.189.54.17") +yarrina.connect.com.au + +to look up all MX records for an entry: +>>> print DNS.mxlookup("connect.com.au") +[(10, 'yarrina.connect.com.au'), (100, 'warrane.connect.com.au')] + +Documentation of the rest of the interface will have to wait for a +later date. Note that the DnsAsyncRequest stuff is currently not +working - I haven't looked too closely at why, yet. + +There's some examples in the tests/ directory - including test5.py, +which is even vaguely useful. It looks for the SOA for a domain, checks +that the primary NS is authoritative, then checks the nameservers +that it believes are NSs for the domain and checks that they're +authoritative, and that the zone serial numbers match. + +see also README.guido for the original docs. + +comments to me - arb@connect.com.au diff --git a/StringUtil.py b/StringUtil.py new file mode 100644 index 00000000..1c32c2f8 --- /dev/null +++ b/StringUtil.py @@ -0,0 +1,136 @@ +import string,re + +HtmlTable = [ + ("ä","ä"), + ("ö","ö"), + ("ü","ü"), + ("Ä","Ä"), + ("Ö","Ö"), + ("Ü","Ü"), + ("ß","ß"), + ("&","&"), + ("<","<"), + (">",">"), + ("é","é"), + ("è","è") + ] + +SQLTable = [ + ("'","''") +] + +def stripHtmlComments(data): + i = string.find(data, "", i) + if j == -1: + break + data = data[:i] + data[j+3:] + i = string.find(data, " + + + \ No newline at end of file diff --git a/test/base2.html b/test/base2.html new file mode 100644 index 00000000..4400f784 --- /dev/null +++ b/test/base2.html @@ -0,0 +1,3 @@ + + + diff --git a/test/base3.html b/test/base3.html new file mode 100644 index 00000000..0f6ae6e2 --- /dev/null +++ b/test/base3.html @@ -0,0 +1,2 @@ + + diff --git a/test/frames.html b/test/frames.html new file mode 100644 index 00000000..b4f8248d --- /dev/null +++ b/test/frames.html @@ -0,0 +1,4 @@ + + + + diff --git a/test/test1.html b/test/test1.html new file mode 100644 index 00000000..88f7ac06 --- /dev/null +++ b/test/test1.html @@ -0,0 +1,17 @@ +Just some HTTP links + + + + + + + + + + + + + + + + + + + + + + + + + + + +> + + + + + + + + + + diff --git a/tests/test.py b/tests/test.py new file mode 100755 index 00000000..6592299b --- /dev/null +++ b/tests/test.py @@ -0,0 +1,22 @@ +#!/opt/python/bin/python1.5 + +import DNS +# automatically load nameserver(s) from /etc/resolv.conf +# (works on unix - on others, YMMV) +DNS.ParseResolvConf() + +# lets do an all-in-one request +# set up the request object +r = DNS.DnsRequest(name='munnari.oz.au',qtype='A') +# do the request +a=r.req() +# and do a pretty-printed output +a.show() + +# now lets setup a reusable request object +r = DNS.DnsRequest(qtype='ANY') +res = r.req("a.root-servers.nex",qtype='ANY') +res.show() +res = r.req("proxy.connect.com.au") +res.show() + diff --git a/tests/test2.py b/tests/test2.py new file mode 100755 index 00000000..3909ed3d --- /dev/null +++ b/tests/test2.py @@ -0,0 +1,17 @@ +#!/opt/python/bin/python1.5 + +import DNS +# automatically load nameserver(s) from /etc/resolv.conf +# (works on unix - on others, YMMV) +DNS.ParseResolvConf() + +r=DNS.Request(qtype='mx') +res = r.req('connect.com.au') +res.show() + +r=DNS.Request(qtype='soa') +res = r.req('connect.com.au') +res.show() + +print DNS.revlookup('192.189.54.17') + diff --git a/tests/test3.py b/tests/test3.py new file mode 100755 index 00000000..8c4f6196 --- /dev/null +++ b/tests/test3.py @@ -0,0 +1,13 @@ +#!/opt/python/bin/python1.5 + +import DNS +# automatically load nameserver(s) from /etc/resolv.conf +# (works on unix - on others, YMMV) +DNS.ParseResolvConf() + +# web server reliability, the NT way. *snigger* +res = r.req('www.microsoft.com',qtype='A') +# res.answers is a list of dictionaries of answers +print len(res.answers),'different A records' +# each of these has an entry for 'data', which is the result. +print map(lambda x:x['data'], res.answers) diff --git a/tests/test4.py b/tests/test4.py new file mode 100755 index 00000000..c179743d --- /dev/null +++ b/tests/test4.py @@ -0,0 +1,7 @@ +#!/opt/python/bin/python + +import DNS + +DNS.ParseResolvConf() + +print DNS.mxlookup("connect.com.au") diff --git a/tests/test5.py b/tests/test5.py new file mode 100755 index 00000000..0ce8d9a6 --- /dev/null +++ b/tests/test5.py @@ -0,0 +1,52 @@ +#!/opt/python/bin/python + +import DNS +DNS.ParseResolvConf() + +def Error(mesg): + import sys + print sys.argv[0],"ERROR:" + print mesg + sys.exit(1) + +def main(): + import sys + if len(sys.argv) != 2: + Error("usage: %s somedomain.com"%sys.argv[0]) + domain = sys.argv[1] + nslist = GetNS(domain) + print "According to the primary, the following are nameservers for this domain" + for ns in nslist: + print " ",ns + CheckNS(ns,domain) + + +def GetNS(domain): + import DNS + r = DNS.Request(domain,qtype='SOA').req() + if r.header['status'] != 'NOERROR': + Error("received status of %s when attempting to look up SOA for domain"% + (r.header['status'])) + primary,email,serial,refresh,retry,expire,minimum = r.answers[0]['data'] + print "Primary nameserver for domain %s is: %s"%(domain,primary) + r = DNS.Request(domain,qtype='NS',server=primary,aa=1).req() + if r.header['status'] != 'NOERROR': + Error("received status of %s when attempting to query %s for NSs"% + (r.header['status'])) + if r.header['aa'] != 1: + Error("primary NS %s doesn't believe that it's authoritative!"% primary) + nslist = map(lambda x:x['data'], r.answers) + return nslist + +def CheckNS(nameserver,domain): + r = DNS.Request(domain,qtype='SOA',server=nameserver,aa=1).req() + if r.header['status'] != 'NOERROR': + Error("received status of %s when attempting to query %s for NS"% + (r.header['status'])) + if r.header['aa'] != 1: + Error("NS %s doesn't believe that it's authoritative!"% nameserver) + primary,email,serial,refresh,retry,expire,minimum = r.answers[0]['data'] + print " NS has serial",serial[1] + +if __name__ == "__main__": + main()