aaronla
-
Python,
pasted
on Mar 2:
|
|
# simplified monadic (PEG) parser. no memoization, some backtracking.
# parser :: str -> maybe (value, str)
def ret (value):
return lambda s: (value, s)
def bind (p, *fs):
def parse (s):
res = p(s)
for f in fs:
res = res and f(res[0])(res[1])
return res
return parse
never = lambda s: False
def alt (*ps):
def parse (s):
for p in ps:
res = p(s)
if res: return res
return False
return parse
empty = ret(None)
def then (p, *ps):
return bind(p, lambda x: bind(then(*ps), lambda rest: ret([x]+rest)) if ps else ret([x]))
def repeat (p, res=[]):
return alt( bind(p, lambda v: repeat(p, res+[v])),
ret(res) )
def repeat1 (p):
return bind(p, lambda v: repeat(p, [v]))
def pred (f):
def parse (s):
if s and f(s[0]): return (s[0], s[1:])
else: return False
return parse
def char (ch):
return pred(lambda c: c==ch)
def literal (s):
return then(*(char(c) for c in s))
def delay (fn):
return bind(empty, lambda _:fn())
def option (p):
return alt(p, empty)
def repn (p, n):
return then(*([p]*n))
end = lambda s: False if s else (None, s)
# try out some parsing
def tryit (lang, inp):
print "parsing", repr(inp), "with", lang
print " ==>", then(eval(lang), end)(inp)
parens = alt(
then( char('('), delay(lambda:parens), char(')') ),
empty
)
tryit('parens', '()')
tryit('parens', '((()))')
tryit('parens', '(()')
term = alt( repeat1(pred(str.isdigit)),
then(char('('),
delay(lambda:expr),
char(')')))
arith = then(term, repeat(then(alt(char('*'), char('/')),
delay(lambda:term))))
expr = then(arith, repeat(then(alt(char('+'), char('-')),
delay(lambda:arith))))
tryit('expr', '1')
tryit('expr', '1+2')
tryit('expr', '1+2*3+(4+5)*6')
name = repeat1(pred(str.isalpha))
opentag = bind(char('<'),
lambda _: name,
lambda n: bind(char('>'), lambda _: ret(''.join(n))))
def closetag(n):
return then(literal('</'), literal(n), char('>'))
xmls = repeat(delay(lambda:xml))
xml = bind(opentag,
lambda n: then(xmls, closetag(n)))
tryit('xml', '<a></a>')
tryit('xml', '<a><b><c></c><d></d></b></a>')
|
Output:
|
|
parsing '()' with parens
==> ([['(', None, ')'], None], '')
parsing '((()))' with parens
==> ([['(', ['(', ['(', None, ')'], ')'], ')'], None], '')
parsing '(()' with parens
==> False
parsing '1' with expr
==> ([[[['1'], []], []], None], '')
parsing '1+2' with expr
==> ([[[['1'], []], [['+', [['2'], []]]]], None], '')
parsing '1+2*3+(4+5)*6' with expr
==> ([[[['1'], []], [['+', [['2'], [['*', ['3']]]]], ['+', [['(', [[['4'], []], [['+', [['5'], []]]]], ')'], [['*', ['6']]]]]]], None], '')
parsing '<a></a>' with xml
==> ([[[], [['<', '/'], ['a'], '>']], None], '')
parsing '<a><b><c></c><d></d></b></a>' with xml
==> ([[[[[[[], [['<', '/'], ['c'], '>']], [[], [['<', '/'], ['d'], '>']]], [['<', '/'], ['b'], '>']]], [['<', '/'], ['a'], '>']], None], '')
|
|