Grammar work

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@12 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2000-02-27 13:46:00 +00:00
parent f013566b4d
commit 55ce8fc0f8
13 changed files with 383 additions and 197 deletions

View file

@ -1,6 +1,6 @@
__version__ = "$Id$"
import time,string,types,parsertemplate,Lexer,Set
import sys,time,string,types,parsertemplate,Lexer,Set
class ParseError(SyntaxError):
pass
@ -45,21 +45,19 @@ class Grammar:
EPS = -1
DummyLA = -2
def __init__(self, productions, tokens=[], verbose=0):
if not productions:
raise ParseError, "empty production list"
def __init__(self, productions, tokens, verbose=0):
if not (productions or tokens):
raise ParseError, "empty production or token list"
self.verbose = verbose
self.productions = productions
self.tokens = tokens
self.terminals = []
self.nonterminals = []
self.terminals = range(len(tokens))
self.nonterminals = map(lambda p: p.lhs, self.productions)
for p in self.productions:
if p.lhs not in self.nonterminals:
self.nonterminals.append(p.lhs)
for s in p.rhs:
if type(s) == types.IntType and s not in self.terminals:
self.terminals.append(s)
self.terminals.sort()
if (s not in self.terminals) and (s not in self.nonterminals):
raise ParseError, "invalid symbol "+`s`+\
" in production '"+`p`+"'"
if self.verbose:
print "Terminals:", self.terminals
print "Nonterminals:", self.nonterminals
@ -127,7 +125,7 @@ class Grammar:
raise ParseError, "start symbol of grammar is not productive"
# reachable nonterminals
reachable_nts = [self.productions[0]]
reachable_nts = [self.productions[0].lhs] # start symbol is reachable
added=1
while added:
added = 0
@ -140,7 +138,7 @@ class Grammar:
# reduce the grammar
self.productions = filter(lambda p, pnt=productive_nts,
rnt=reachable_nts: p.lhs in pnt or p.lhs in rnt,
rnt=reachable_nts: (p.lhs in pnt) and (p.lhs in rnt),
self.productions)
if self.verbose:
print "Reduced grammar:\n"+`self`
@ -170,40 +168,40 @@ class Grammar:
def _genFIRSTmap(self):
"""return dictionary d with d[A] = FIRST(A) for all symbols A
"""
self.FIRSTmap = {}
for sym in [Grammar.EPS]+self.terminals:
self.FIRSTmap[sym] = {sym: 1}
self.firstmap = {}
for sym in [Grammar.EPS, Grammar.DummyLA]+self.terminals:
self.firstmap[sym] = {sym: 1}
added=1
while added:
added = 0
for nt in self.nonterminals:
firsts = self.FIRSTmap.get(nt, {})
firsts = self.firstmap.get(nt, {})
for p in self.lhsprods[nt]:
if not p.rhs:
if not firsts.has_key(Grammar.EPS):
added = firsts[Grammar.EPS] = 1
for Y in p.rhs:
f = self.FIRSTmap.get(Y, {})
f = self.firstmap.get(Y, {})
for a in f.keys():
if not firsts.has_key(a):
added = firsts[a] = 1
if not Y in self.lhsdereps:
break
self.FIRSTmap[nt] = firsts
for s in self.FIRSTmap.keys():
self.FIRSTmap[s] = self.FIRSTmap[s].keys()
self.firstmap[nt] = firsts
for s in self.firstmap.keys():
self.firstmap[s] = self.firstmap[s].keys()
def FIRST(self, gs_list):
"""extend FIRST to set of symbols
precondition: we already have calculated FIRST for all single
symbols and stored the values in self.FIRSTmap
symbols and stored the values in self.firstmap
"""
assert gs_list, "list must be nonempty"
res = {}
allhaveeps=1
for X in gs_list:
set = self.FIRSTmap[X]
set = self.firstmap[X]
for s in set:
res[s] = 1
if not Grammar.EPS in set:
@ -222,6 +220,7 @@ class Grammar:
newsym = newsym+"'"
self.productions.insert(0, Production(newsym,
[self.productions[0].lhs]))
self.lhsprods[newsym] = self.productions[0]
def _genFOLLOWmap(self):
"""dictionary d with d[A] = FOLLOW(A) for all nonterminals A
@ -237,7 +236,7 @@ class Grammar:
for i in range(1,len(p.rhs)):
B = p.rhs[i-1]
beta = p.rhs[i]
for f in self.FIRSTmap[beta]:
for f in self.firstmap[beta]:
if f != Grammar.EPS and f not in self.FOLLOWmap[B]:
self.FOLLOWmap[B].append(f)
added=1
@ -260,16 +259,16 @@ class Grammar:
res = {}
for item in items.keys():
res[item]=0
more = []
more = 1
while more:
more = []
for prodind, rhsind, term in res.keys():
if rhsind >= len(self.productions[prodind].rhs):
continue
prod = self.productions[prodind]
for p in self.lhsprods.get(self.prod.rhs[rhsind], []):
if rhsind >= len(prod.rhs):
continue
for p in self.lhsprods.get(prod.rhs[rhsind], []):
try:
newpart = self.prod.rhs[rhsind + 1]
newpart = prod.rhs[rhsind + 1]
except IndexError:
newpart = Grammar.EPS
for t in self.FIRST([newpart, term]):
@ -284,7 +283,7 @@ class Grammar:
res[item]=0
return res
def _prodinfotable(self):
def prodinfotable(self):
"""returns a list of three pieces of info for each production.
The first is the lenght of the production, the second is the
function name associated with the production and the third is
@ -368,71 +367,71 @@ class Grammar:
return res
def _lookaheads(self, itemset):
setsofitems = kernels = self.kernelitems
spontaneous = []
propagates = {}
gotomap = {}
for kpi, kri in itemset:
propagates[(kpi,kri)] = []
C = self._closure({(kpi, kri, Grammar.DummyLA):0})
for cpi, cri, t in C.keys():
if cri == len(self.productions[cpi].rhs):
continue
s = self.productions[cpi].rhs[cri]
if gotomap.has_key(s):
newstate = gotomap[s]
X = self.productions[cpi].rhs[cri]
if gotomap.has_key(X):
newstate = gotomap[X]
else:
newstate = setsofitems.index(self._goto(itemset, s))
gotomap[s] = newstate
gotomap[X] = newstate = self.kernelitems.index(\
self._goto(itemset, X))
if t != Grammar.DummyLA:
spontaneous.append((newstate, cpi, cri+1, t))
spontaneous.append((newstate, (cpi, cri+1), t))
else:
if propagates.has_key((kpi, kri)):
propagates[(kpi, kri)].append((newstate, cpi, cri+1))
else:
propagates[(kpi, kri)]=[(newstate, cpi, cri+1)]
propagates[(kpi, kri)].append((newstate, (cpi, cri+1)))
return spontaneous, propagates
def _genKernelitems(self):
self.kernelitems = todo = [[(0, 0)]]
newtodo = 0
newtodo = 1
while newtodo:
newtodo = []
for items in todo:
for s in self.terminals + self.nonterminals:
for s in self.nonterminals + self.terminals:
g = self._goto(items, s)
if g and g not in self.kernelitems:
newtodo.append(g)
if self.verbose:
print "found %d more kernels" % (len(newtodo))
self.kernelitems = self.kernelitems + newtodo
todo = newtodo
self.kernelitems.sort()
self.kernelitems = self.kernelitems + newtodo
todo = newtodo
if self.verbose:
print "generated kernelitems:",self.kernelitems
def _initLALR1items(self):
self._genKernelitems()
if self.verbose:
print "initializing lookahead table..."
props = {}
la_table = []
for i in range(len(self.kernelitems)):
la_table.append([])
for y in range(len(self.kernelitems[i])):
la_table[i].append([])
la_table[0][0] = [0] # EOF
if self.verbose:
print "calculating propagations and spontaneous lookaheads"
state_i = 0
for itemset in self.kernelitems:
if self.verbose:
print ".",
sp, pr = self._lookaheads(itemset)
for ns, pi, ri, t in sp:
inner = self.kernelitems[ns].index((pi, ri))
la_table[ns][inner].append(t)
props[state_i] = pr
state_i = state_i + 1
la_table.append([])
for item in itemset:
la_table[-1].append([])
la_table[0][0].append(0) # EOF
for i in range(len(self.kernelitems)):
sp, pr = self._lookaheads(self.kernelitems[i])
for ns, item, t in sp:
inner = self.kernelitems[ns].index(item)
if t not in la_table[ns][inner]:
la_table[ns][inner].append(t)
props[i] = pr
if self.verbose:
print "Lookahead table:",la_table
print "Propagations:",props
return la_table, props
def _genLALR1items(self):
la_table, props = self._initLALR1items()
if self.verbose:
print "calculating lookahead table..."
added_la=1
while added_la:
added_la = 0
@ -457,10 +456,12 @@ class Grammar:
la_table[pstate][inner].append(pt)
state_i = state_i + 1
if self.verbose:
print "Lookahead table:",la_table
# this section just reorganizes the above data
# to the state it's used in later...
if self.verbose:
print "done with lalr1items, reorganizing the data"
print "reorganizing the data..."
self.LALRitems = []
state_i = 0
for state in self.kernelitems:
@ -474,29 +475,33 @@ class Grammar:
inner.sort()
self.LALRitems.append(inner)
state_i = state_i + 1
if self.verbose:
print "LALR items:",self.LALRitems
def actiontable(self):
items = self.LALRitems
res = []
state_i = 0
terms = self.terminals[:]
terms = self.terminals[:]
terms.append(Grammar.EPS)
errentry = ("", -1)
for state in items:
list = [errentry] * len(terms)
res.append(list)
for prodind, rhsind, term in state:
for state in self.LALRitems:
res.append([errentry] * len(terms))
for (prodind, rhsind), term in state:
if rhsind == len(self.productions[prodind].rhs):
if prodind != 0:
new = ("r", prodind)
old = res[state_i][terms.index(term)]
if old != errentry and old != new:
print "Conflict[%d,%d]:" % (state_i, terms.index(term)), old, "->", new
print "Conflict[%d,%d]:" % (state_i,
terms.index(term)), old, "->", new
res[state_i][terms.index(term)] = new
else:
new = ("a", -1)
old = res[state_i][terms.index(term)]
if old != errentry and old != new:
print "Conflict[%d,%d]:" % (state_i, terms.index(term)), old, "->", new
print "Conflict[%d,%d]:" % (state_i,
terms.index(term)), old, "->", new
res[state_i][terms.index(term)] = new
# calculate reduction by epsilon productions
@ -505,22 +510,23 @@ class Grammar:
ntfirst = self.firstmap[nt]
ntfirsts = self.ntfirstmap.get(nt, {})
for k in ntfirsts.keys():
if self.lhseps.get(k, ""):
if k in self.lhseps:
reduceterms = self.followmap[k]
# print `((prodind, rhsind), term)`, reduceterms
print `((prodind, rhsind), term)`, reduceterms
for r in reduceterms:
inner = terms.index(r)
old = res[state_i][inner]
new = ("r", self.lhseps[k])
if old != errentry and old != new:
print "Conflict[%d,%d]:" % (state_i, inner), old, "->", new
print "Conflict[%d,%d]:" % (state_i,
inner), old, "->", new
res[state_i][inner] = new
# calculate the shifts that occur but whose normal items aren't in the kernel
tfirsts = self.tfirstmap[nt]
for t in tfirsts:
inner = terms.index(t)
g = self.goto(self.kernelitems[state_i], t)
g = self._goto(self.kernelitems[state_i], t)
old = res[state_i][inner]
try:
news = self.kernelitems.index(g)
@ -528,20 +534,22 @@ class Grammar:
continue
new = ("s", news)
if old != errentry and old != new:
print "Conflict[%d,%d]:" % (state_i, inner), old, "->", new
print "Conflict[%d,%d]:" % (state_i,
inner), old, "->", new
res[state_i][inner] = new
# compute the rest of the shifts that occur 'normally' in the kernel
else:
t = self.productions[prodind].rhs[rhsind]
inner = self.terminals.index(t)
gt = self.goto(self.kernelitems[state_i], t)
inner = terms.index(t)
gt = self._goto(self.kernelitems[state_i], t)
if gt in self.kernelitems:
news = self.kernelitems.index(gt)
old = res[state_i][inner]
new = ("s", news)
if old != errentry and old != new:
print "Conflict[%d,%d]:" % (state_i, inner), old, "->", new
print "Conflict[%d,%d]:" % (state_i,
inner), old, "->", new
res[state_i][inner] = new
state_i = state_i + 1
return res
@ -568,6 +576,7 @@ class Grammar:
self._genLALR1items()
at = self.actiontable()
gt = self.gototable()
self.productions = self.productions[1:]
pi = self.prodinfotable()
template = parsertemplate.__doc__
vals = {"parsername": parsername, "lexerinit": lexerinit}
@ -643,6 +652,27 @@ def _bootstrap():
# produce the parser
g.writefile("./Parsers/GrammarParser.py", "GrammarParser", "PyLR.Lexers.GrammarLex()")
if __name__=='__main__':
_bootstrap()
def _test():
# first a non-productive Grammar
try:
Grammar([Production("S", ["S"])], ["EOF"])
assert 0, "Bummer!"
except ParseError: pass
# now a simple Grammar
import Lexers
toks = Lexers.MathLex().getTokenList()
prods = map(_makeprod,
[("expression", ["expression",toks.index("PLUS"),"term"], "addfunc"),
("expression", ["term"]),
("term", ["term", toks.index("TIMES"),"factor"], "timesfunc"),
("term", ["factor"]),
("factor", [toks.index("LPAR"), "expression", toks.index("RPAR")], "parenfunc"),
("factor", [toks.index("INT")])])
g = Grammar(prods, toks, 1)
g.writefile("Parsers/MathParser.py", "MathParser", "PyLR.Lexers.MathLex()")
if __name__=='__main__':
# _bootstrap()
_test()

View file

@ -46,7 +46,7 @@ class Lexer:
def scan(self, verbose=0):
if self.textindex >= len(self.text):
if verbose: print "EOF"
if verbose: print "tok=0, val=EOF"
return (0, "EOF")
for i in self.irange:
tok = self.toklist[i]
@ -61,7 +61,7 @@ class Lexer:
val = apply(tok[2], (mo,))
else:
val = mo.group(0)
if verbose: print str(i)+", "+str(val)
if verbose: print "tok="+`i`+", val="+`val`
return (i, val)
raise PyLRSyntaxError, "line "+\
`StringUtil.getLineNumber(self.text, self.textindex)`+\

View file

@ -4,21 +4,21 @@ this file contains the Lexer that is used in parsing Grammar specifications
import re,PyLR
def retlex(mo):
def _retlex(mo):
return mo.group("lex")
def retcode(mo):
def _retcode(mo):
return mo.group("code")
def retclass(mo):
def _retclass(mo):
return mo.group("class")
class GrammarLex(PyLR.Lexer):
def __init__(self):
PyLR.Lexer.__init__(self)
self.addpat(r"_lex\s+(?P<lex>[^\n]*)", "LEX", retlex)
self.addpat(r"_code\s+(?P<code>[^\n]*)", "CODE", retcode)
self.addpat(r"_class\s+(?P<class>[a-zA-Z_][a-zA-Z_0-9]*)", "CLASS", retclass)
self.addpat(r"_lex\s+(?P<lex>[^\n]*)", "LEX", _retlex)
self.addpat(r"_code\s+(?P<code>[^\n]*)", "CODE", _retcode)
self.addpat(r"_class\s+(?P<class>[a-zA-Z_][a-zA-Z_0-9]*)", "CLASS", _retclass)
self.addpat(r"[a-zA-Z_][a-zA-Z_0-9]*", "ID")
self.addpat(r":", "COLON")
self.addpat(r";", "SCOLON")

14
PyLR/Lexers/MathLex.py Normal file
View file

@ -0,0 +1,14 @@
import re, PyLR
def _intfunc(m):
return int(m.group(0))
class MathLex(PyLR.Lexer):
def __init__(self):
PyLR.Lexer.__init__(self)
self.addpat(r"([1-9]([0-9]+)?)|0", "INT", _intfunc)
self.addpat(r"\+", "PLUS")
self.addpat(r"\*","TIMES")
self.addpat(r"\(", "LPAR")
self.addpat(r"\)", "RPAR")
self.addpat(r"\s+", "WS", None, 1)

View file

@ -1 +1,2 @@
from GrammarLex import GrammarLex
from MathLex import MathLex

View file

@ -1,15 +0,0 @@
import Lexer, re, string
def idfunc(m):
return int(m.group(0))
class mathlex(Lexer.Lexer):
def __init__(self):
Lexer.Lexer.__init__(self)
self.addpat(r"([1-9]([0-9]+)?)|0", "ID", idfunc)
self.addpat(r"\+", "PLUS")
self.addpat(r"\*","TIMES")
self.addpat(r"\(", "LPAREN")
self.addpat(r"\)", "RPAREN")
self.addpat(r"\s+", "", None, Lexer.SKIPTOK)

19
PyLR/MyMathParser.py Normal file
View file

@ -0,0 +1,19 @@
import PyLR
class MyMathParser(PyLR.Parsers.MathParser):
def addfunc(self, left, plus, right):
print "%d + %d" % (left, right)
return left + right
def parenfunc(self, lp, expr, rp):
print "handling parens"
return expr
def timesfunc(self, left, times, right):
print "%d * %d" % (left, right)
return left * right
def _test():
p = MyMathParser()
p.parse("4 * (3 + 2 * 5)", 1)
if __name__=='__main__':
_test()

View file

@ -1,6 +1,6 @@
__version__ = "$Id$"
class LRParser:
class Parser:
def __init__(self, lexer, actiontable, gototable, prodinfo):
self.lexer = lexer
self.actions = actiontable
@ -28,20 +28,24 @@ class LRParser:
tok, val = self.lexer.scan(verbose)
state = stack[-1]
action = self.actions[state][tok]
if verbose:
print "action",action
if action[0]=='s':
# push the symbol and the state
stack = stack + [tok, action[1]]
elif action[0]=='r':
P = self.prodinfo[action[1]]
P = self.prodinfo[action[1]-1]
# reduce P=A->b by popping 2*|b| from the stack
stack = stack[:-2*P[0]]
goto = self.gotos[stack[-1]][P[2]]
# push A and the goto symbol
stack = stack + [P[2], self.gotos[stack[-1][P[2]]]]
stack = stack + [P[2], goto]
if verbose:
print "reduce",P
P[1](tok, val)
elif action[0]=='a':
return
else:
self.onError()
print "error"
return

View file

@ -1,5 +1,5 @@
"""
./Parsers/GrammarParser.py -- created Wed Feb 23 15:23:44 2000
./Parsers/GrammarParser.py -- created Sun Feb 27 11:42:48 2000
This file was automatically generated by the PyLR parser generator.
It defines the tables 'actiontable', 'gototable', and 'prodinfo'. These
@ -15,61 +15,71 @@ is indicated in GrammarParser's doc-string.
import PyLR
#
# the action table ('s', 4) means shift to state 4,
# ('r', 4) means reduce by production number 4
# other entries are errors. each row represents a state
# and each column a terminal lookahead symbol (excluding symbols with
# Lexer.SKIPTOK).
# the action table means
# ('s', -1) shift
# ('r', <n>) reduce with production n
# ('a', -1) accept
# ('', -1) error
# each row represents a state and each column a terminal lookahead symbol
# (excluding symbols with Lexer.SKIPTOK of course).
# Lexer symbols are:
# ['EOF', 'LEX', 'CODE', 'CLASS', 'ID', 'COLON', 'SCOLON', 'OR', 'LPAREN', 'RPAREN', 'GDEL', '', '']
# ['EOF', 'LEX', 'CODE', 'CLASS', 'ID', 'COLON', 'SCOLON', 'OR', 'LPAREN', 'RPAREN', 'GDEL']
#
_actiontable = [
[('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('a', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 1)],
[('s', 10), ('s', 11), ('s', 12), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 5), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 2)],
[('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 15), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 7), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 3)],
[('r', 4), ('r', 4), ('r', 4), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 4), ('', -1)],
[('r', 5), ('r', 5), ('r', 5), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 5), ('', -1)],
[('r', 6), ('r', 6), ('r', 6), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 6), ('', -1)],
[('r', 7), ('r', 7), ('r', 7), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 7), ('', -1)],
[('r', 8), ('r', 8), ('r', 8), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 8), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 9), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 9), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 10), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 10), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('s', 16), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 18), ('s', 20), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 11), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 11), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 12), ('r', 12), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 28), ('', -1), ('r', 17), ('r', 17), ('r', 17), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 13), ('r', 13), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 14), ('r', 14), ('s', 23), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 24), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 25), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('r', 15), ('r', 15), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('s', 27), ('', -1), ('r', 16), ('r', 16), ('r', 16), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 18), ('', -1), ('r', 18), ('r', 18), ('r', 18), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('r', 19), ('', -1), ('r', 19), ('r', 19), ('r', 19), ('', -1), ('', -1), ('', -1)]
[('', -1), ('s', 1), ('s', 2), ('s', 3), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('s', 4)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('a', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)]
]
#
# the goto table, each row represents a state
# and each column, the nonterminal that was on the lhs of the
# and each column the nonterminal that was on the lhs of the
# reduction
#
_gototable = [
[1, 2, 3, 9, None, None, None, None, None, None],
[5, 6, 7, 8, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, 4, None, 8, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, 6, 14, None, None, None, None],
[None, None, None, None, None, 13, None, None, None, None],
[None, None, None, None, 10, 11, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, 12, None, 13, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, 16, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, 18, 19, 20, 21],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
@ -78,15 +88,7 @@ _gototable = [
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, 17, 19, 22, 26],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, 21, 22, 26],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, 26, 20, 21],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None],
@ -101,7 +103,7 @@ _gototable = [
# the entries are the length of the production, the name of a method
# in an instance of the GrammarParser class below that gets called
# when that production occurs, and the index of the lhs in the
# nonterminals (as in # the gototable)
# nonterminals (as in the gototable)
#
_prodinfo = [
(1, 'unspecified', 0), # pspec: gspec (unspecified)
@ -128,7 +130,7 @@ _prodinfo = [
class GrammarParser(PyLR.Parser.LRParser):
class GrammarParser(PyLR.Parser.Parser):
"""
this class was produced automatically by the PyLR parser generator.
It is meant to be subclassed to produce a parser for the grammar

137
PyLR/Parsers/MathParser.py Normal file
View file

@ -0,0 +1,137 @@
"""
Parsers/MathParser.py -- created Sun Feb 27 14:24:41 2000
This file was automatically generated by the PyLR parser generator.
It defines the tables 'actiontable', 'gototable', and 'prodinfo'. These
tables are used to give functionality to a parsing engine. It also defines
A Parser class called MathParser which will use this engine. It's usage
is indicated in MathParser's doc-string.
"""
#
# this section contains source code added by the user
# plus 'import PyLR'
#
import PyLR
#
# the action table means
# ('s', -1) shift
# ('r', <n>) reduce with production n
# ('a', -1) accept
# ('', -1) error
# each row represents a state and each column a terminal lookahead symbol
# (including Grammar.EPS, which is -1).
# Lexer symbols are:
# ['EOF', 'INT', 'PLUS', 'TIMES', 'LPAR', 'RPAR']
#
_actiontable = [
[('', -1), ('s', 7), ('', -1), ('', -1), ('s', 8), ('', -1), ('', -1)],
[('a', -1), ('', -1), ('s', 9), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('r', 2), ('', -1), ('r', 2), ('s', 11), ('', -1), ('r', 2), ('r', 2)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('r', 4), ('', -1), ('r', 4), ('r', 4), ('', -1), ('r', 4), ('r', 4)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('r', 6), ('', -1), ('r', 6), ('r', 6), ('', -1), ('r', 6), ('r', 6)],
[('', -1), ('s', 7), ('', -1), ('', -1), ('s', 8), ('', -1), ('', -1)],
[('', -1), ('s', 7), ('', -1), ('', -1), ('s', 8), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('s', 7), ('', -1), ('', -1), ('s', 8), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('s', 9), ('', -1), ('', -1), ('s', 23), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('r', 1), ('', -1), ('r', 1), ('s', 11), ('', -1), ('r', 1), ('r', 1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('r', 3), ('', -1), ('r', 3), ('r', 3), ('', -1), ('r', 3), ('r', 3)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)],
[('r', 5), ('', -1), ('r', 5), ('r', 5), ('', -1), ('r', 5), ('r', 5)],
[('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1), ('', -1)]
]
#
# the goto table, each row represents a state
# and each column the nonterminal that was on the lhs of the
# reduction
#
_gototable = [
[1, 1, 3, 3, 5, 5],
[None, None, None, None, None, None],
[None, None, None, None, None, None],
[None, None, None, None, None, None],
[None, None, None, None, None, None],
[None, None, None, None, None, None],
[None, None, None, None, None, None],
[None, None, None, None, None, None],
[13, 13, 3, 3, 5, 5],
[None, None, 15, 15, 5, 5],
[None, None, 15, 15, 5, 5],
[None, None, None, None, 19, 19],
[None, None, None, None, 19, 19],
[None, None, None, None, None, None],
[None, None, None, None, None, None],
[None, None, None, None, None, None],
[None, None, None, None, None, None],
[None, None, None, None, None, None],
[None, None, None, None, None, None],
[None, None, None, None, None, None],
[None, None, None, None, None, None],
[None, None, None, None, None, None],
[None, None, None, None, None, None],
[None, None, None, None, None, None],
[None, None, None, None, None, None]
]
#
# This is the prodinfo table. each row represents a production
# the entries are the length of the production, the name of a method
# in an instance of the MathParser class below that gets called
# when that production occurs, and the index of the lhs in the
# nonterminals (as in the gototable)
#
_prodinfo = [
(3, 'addfunc', 0), # expression: expression 2 term (addfunc)
(1, 'unspecified', 0), # expression: term (unspecified)
(3, 'timesfunc', 2), # term: term 3 factor (timesfunc)
(1, 'unspecified', 2), # term: factor (unspecified)
(3, 'parenfunc', 4), # factor: 4 expression 5 (parenfunc)
(1, 'unspecified', 4), # factor: 1 (unspecified)
]
class MathParser(PyLR.Parser.Parser):
"""
this class was produced automatically by the PyLR parser generator.
It is meant to be subclassed to produce a parser for the grammar
expression: expression PLUS term (addfunc);
expression: term (unspecified);
term: term TIMES factor (timesfunc);
term: factor (unspecified);
factor: LPAR expression RPAR (parenfunc);
factor: INT (unspecified);
While parsing input, if one of the above productions is recognized,
a method of your sub-class (whose name is indicated in parens to the
right) will be invoked. Names marked 'unspecified' should be ignored.
usage:
class MyMathParser(MathParser):
# ...define the methods for the productions...
p = MyMathParser(); p.parse(text)
"""
def __init__(self):
lexer = PyLR.Lexers.MathLex()
PyLR.Parser.Parser.__init__(self, lexer, _actiontable, _gototable, _prodinfo)

View file

@ -4,4 +4,5 @@ work, import the name here
"""
from GrammarParser import GrammarParser
from MathParser import MathParser

View file

@ -15,19 +15,14 @@
#include "Python.h"
#include "PyLRengine.h"
/***********************************************************************
* PyLRengine Error things
***********************************************************************/
static PyObject* PyLRParseError;
#define CHECK_MALLOC(obj) \
if (!(obj = (PyObject *) malloc (sizeof(PyObject)))) { \
PyErr_SetString(PyExc_MemoryError, "no more memory"); \
return NULL; \
}
static PyObject *PyLRengineError;
#define onError(message) \
{ PyErr_SetString(PyExc_ParseError, message); return NULL; }
{ PyErr_SetString(PyLRengineError, message); return NULL; }
@ -662,45 +657,43 @@ static PyTypeObject ParserType = {
static PyObject *
parsernew(self, args)
PyObject* self;
PyObject* args;
PyObject * self;
PyObject * args;
{
PyObject* pyprodlengths = NULL;
PyObject* pyactions = NULL;
PyObject* pygotos = NULL;
PyObject* res = NULL;
int bufchunksize=50;
int stackchunksize=100;
CHECK_MALLOC(pyprodlengths)
CHECK_MALLOC(pyactions)
CHECK_MALLOC(pygotos)
if (!PyArg_ParseTuple(args, "O!O!O!|ii", &PyList_Type, &pyprodlengths,
&PyList_Type, &pyactions, &PyList_Type, &pygotos,
&bufchunksize, &stackchunksize))
goto finally;
res = (PyObject*) newparserobject(pyprodlengths, pyactions, pygotos, bufchunksize, stackchunksize);
finally:
Py_XDECREF(pyprodlengths);
Py_XDECREF(pyactions);
Py_XDECREF(pygotos);
return res;
PyObject * pyprodlengths;
PyObject * pyactions;
PyObject * pygotos;
int bufchunksize=50;
int stackchunksize=100;
if ((pyprodlengths = (PyObject *) malloc (sizeof(PyObject))) == NULL)
onError("No More Mem!");
if ((pyactions = (PyObject *) malloc (sizeof(PyObject))) == NULL)
onError("No More Mem!");
if ((pygotos = (PyObject *) malloc (sizeof(PyObject))) == NULL)
onError("No More Mem!");
if (!PyArg_ParseTuple(args, "O!O!O!|ii", &PyList_Type, &pyprodlengths,
&PyList_Type, &pyactions, &PyList_Type, &pygotos,&bufchunksize, &stackchunksize))
return NULL;
return (PyObject *) newparserobject(pyprodlengths, pyactions, pygotos, bufchunksize, stackchunksize);
}
static struct PyMethodDef PyLRengine_methods[] = {
{"NewEngine", (PyCFunction)parsernew},
{NULL, NULL}
{"NewEngine", parsernew, 1},
{NULL, NULL}
};
void
initPyLRengine()
{
PyObject *m, *d;
m = Py_InitModule("PyLRengine", PyLRengine_methods);
d = PyModule_GetDict(m);
if (PyErr_Occurred())
Py_FatalError("can't initialize module PyLRengine");
PyObject *m, *d;
m = Py_InitModule("PyLRengine", PyLRengine_methods);
d = PyModule_GetDict(m);
PyLRengineError = Py_BuildValue("s", "PyLRengine.error");
PyDict_SetItemString(d, "error", PyLRengineError);
if (PyErr_Occurred())
Py_FatalError("can't initialize module PyLRengine");
}

View file

@ -27,7 +27,7 @@ import PyLR
# ('a', -1) accept
# ('', -1) error
# each row represents a state and each column a terminal lookahead symbol
# (excluding symbols with Lexer.SKIPTOK of course).
# (including Grammar.EPS, which is -1).
# Lexer symbols are:
# %(symbols)s
#