codepad
[
create a new paste
]
login
|
about
Language:
C
C++
D
Haskell
Lua
OCaml
PHP
Perl
Plain Text
Python
Ruby
Scheme
Tcl
# simplified monadic packrat (almost CFG) parser # derived from parse6.py, added (lazy) backtracking # # parser :: str -> [ (value, str) ] class lcons (object): def __init__ (self, iter): self.iter = iter; self.value = None def force (self): if self.iter: try: self.value = (self.iter.next(), lcons(self.iter)) except StopIteration: self.value = None self.iter = None return self.value def empty(self): return self.force() == None def head (self): return self.force()[0] def tail (self): return self.force()[1] def __iter__ (self): while not self.empty(): yield self.head() self = self.tail() def memoize(fn, _memo=dict()): def replc (s): k = (fn, s) if k not in _memo: _memo[k] = lcons(iter(fn(s))) return _memo[k] return replc def ret (value): return lambda s: [(value, s)] def bind (p, f, *fs): @memoize def parse (s): for res1 in p(s): for res2 in f(res1[0])(res1[1]): yield res2 if fs: return bind(parse, *fs) return parse never = lambda s: [] def alt (*ps): @memoize def parse (s): for p in ps: for res in p(s): yield res return parse empty = ret(None) def then (p, *ps): more = then(*ps) if ps else None return bind(p, lambda x: bind(more, lambda rest: ret([x]+rest)) if ps else ret([x])) def repeat (p, res=[]): return alt( bind(p, lambda v: repeat(p, res+[v])), ret(res) ) def repeat1 (p): return bind(p, lambda v: repeat(p, [v])) def pred (f): def parse (s): if s and f(s[0]): return [(s[0], s[1:])] else: return [] return parse def char (ch): return pred(lambda c: c==ch) def literal (s): return then(*(char(c) for c in s)) def delay (fn): return bind(empty, lambda _:fn()) def option (p): return alt(p, empty) def repn (p, n): return then(*([p]*n)) end = lambda s: [] if s else [(None, s)] # try out some parsing def take (iterable, n): if not n: return for p in iterable: yield p n -= 1 if not n: return def tryit (lang, inp): print "parsing", repr(inp), "with", lang print " ==>", list(take(then(eval(lang), end)(inp), 1)) parens = alt( then( char('('), delay(lambda:parens), char(')') ), empty ) tryit('parens', '()') tryit('parens', '((()))') tryit('parens', '(()') term = alt( repeat1(pred(str.isdigit)), then(char('('), delay(lambda:expr), char(')'))) arith = then(term, repeat(then(alt(char('*'), char('/')), delay(lambda:term)))) expr = then(arith, repeat(then(alt(char('+'), char('-')), delay(lambda:arith)))) tryit('expr', '1') tryit('expr', '1+2') tryit('expr', '1+2*3+(4+5)*6') # now for a brutally ambiguous grammar ambi = lambda n: then(repn(option(char('a')), n), repn(char('a'), n)) tryit('ambi(6)', 'a'*6) tryit('ambi(8)', 'a'*8) tryit('ambi(10)', 'a'*10) name = repeat1(pred(str.isalpha)) opentag = bind(char('<'), lambda _: name, lambda n: bind(char('>'), lambda _: ret(''.join(n)))) def closetag(n): return then(literal('</'), literal(n), char('>')) xmls = repeat(delay(lambda:xml)) xml = bind(opentag, lambda n: then(xmls, closetag(n))) tryit('xml', '<a></a>') tryit('xml', '<a><b><c></c><d></d></b></a>')
Private
[
?
]
Run code
Submit