[ create a new paste ] login | about

Link: http://codepad.org/pmpqp1wI    [ raw code | output | fork ]

aaronla - Python, pasted on Mar 2:
# simplified monadic packrat (almost CFG) parser
#  derived from parse6.py, added (lazy) backtracking   
#
#  parser :: str -> [ (value, str) ]
class lcons (object):
    def __init__ (self, iter): self.iter = iter; self.value = None
    def force (self):
        if self.iter:
            try:
                self.value = (self.iter.next(), lcons(self.iter))
            except StopIteration:
                self.value = None
            self.iter = None
        return self.value
    def empty(self): return self.force() == None
    def head (self): return self.force()[0]
    def tail (self): return self.force()[1]
    def __iter__ (self):
        while not self.empty():
            yield self.head()
            self = self.tail()

def memoize(fn, _memo=dict()):
    def replc (s):
        k = (fn, s)
        if k not in _memo:
            _memo[k] = lcons(iter(fn(s)))
        return _memo[k]
    return replc

def ret (value): 
    return lambda s: [(value, s)]

def bind (p, f, *fs):
    @memoize
    def parse (s):
        for res1 in p(s):
            for res2 in f(res1[0])(res1[1]):
                yield res2
    if fs: return bind(parse, *fs)
    return parse

never = lambda s: []

def alt (*ps):
    @memoize
    def parse (s):
        for p in ps:
            for res in p(s):
                yield res
    return parse

empty = ret(None)

def then (p, *ps):
    more = then(*ps) if ps else None
    return bind(p, lambda x: bind(more, lambda rest: ret([x]+rest)) if ps else ret([x]))

def repeat (p, res=[]):
    return alt( bind(p, lambda v: repeat(p, res+[v])),
                ret(res) )
def repeat1 (p):
    return bind(p, lambda v: repeat(p, [v]))

def pred (f):
    def parse (s):
        if s and f(s[0]): return [(s[0], s[1:])]
        else: return []
    return parse

def char (ch):
    return pred(lambda c: c==ch)

def literal (s):
    return then(*(char(c) for c in s))

def delay (fn):
    return bind(empty, lambda _:fn())

def option (p):
    return alt(p, empty)

def repn (p, n):
    return then(*([p]*n))

end = lambda s: [] if s else [(None, s)]

# try out some parsing
def take (iterable, n):
    if not n: return
    for p in iterable:
        yield p
        n -= 1
        if not n: return

def tryit (lang, inp):
    print "parsing", repr(inp), "with", lang
    print " ==>", list(take(then(eval(lang), end)(inp), 1))

parens = alt(
    then( char('('), delay(lambda:parens), char(')') ),
    empty
    )

tryit('parens', '()')
tryit('parens', '((()))')
tryit('parens', '(()')

term = alt( repeat1(pred(str.isdigit)),
            then(char('('), 
                delay(lambda:expr), 
                char(')')))
arith = then(term, repeat(then(alt(char('*'), char('/')), 
                               delay(lambda:term))))
expr = then(arith, repeat(then(alt(char('+'), char('-')), 
                               delay(lambda:arith))))
 
tryit('expr', '1')
tryit('expr', '1+2')
tryit('expr', '1+2*3+(4+5)*6')

# now for a brutally ambiguous grammar
ambi = lambda n: then(repn(option(char('a')), n),
                      repn(char('a'), n))
tryit('ambi(6)', 'a'*6)
tryit('ambi(8)', 'a'*8)
tryit('ambi(10)', 'a'*10)
        
name = repeat1(pred(str.isalpha))
opentag = bind(char('<'), 
           lambda _: name,
           lambda n: bind(char('>'), lambda _: ret(''.join(n))))
def closetag(n):
    return then(literal('</'), literal(n), char('>'))
xmls = repeat(delay(lambda:xml))
xml = bind(opentag,
           lambda n: then(xmls, closetag(n)))

tryit('xml', '<a></a>')
tryit('xml', '<a><b><c></c><d></d></b></a>')


Output:
parsing '()' with parens
 ==> [([['(', None, ')'], None], '')]
parsing '((()))' with parens
 ==> [([['(', ['(', ['(', None, ')'], ')'], ')'], None], '')]
parsing '(()' with parens
 ==> []
parsing '1' with expr
 ==> [([[[['1'], []], []], None], '')]
parsing '1+2' with expr
 ==> [([[[['1'], []], [['+', [['2'], []]]]], None], '')]
parsing '1+2*3+(4+5)*6' with expr
 ==> [([[[['1'], []], [['+', [['2'], [['*', ['3']]]]], ['+', [['(', [[['4'], []], [['+', [['5'], []]]]], ')'], [['*', ['6']]]]]]], None], '')]
parsing 'aaaaaa' with ambi(6)
 ==> [([[[None, None, None, None, None, None], ['a', 'a', 'a', 'a', 'a', 'a']], None], '')]
parsing 'aaaaaaaa' with ambi(8)
 ==> [([[[None, None, None, None, None, None, None, None], ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']], None], '')]
parsing 'aaaaaaaaaa' with ambi(10)
 ==> [([[[None, None, None, None, None, None, None, None, None, None], ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']], None], '')]
parsing '<a></a>' with xml
 ==> [([[[], [['<', '/'], ['a'], '>']], None], '')]
parsing '<a><b><c></c><d></d></b></a>' with xml
 ==> [([[[[[[[], [['<', '/'], ['c'], '>']], [[], [['<', '/'], ['d'], '>']]], [['<', '/'], ['b'], '>']]], [['<', '/'], ['a'], '>']], None], '')]


Create a new paste based on this one


Comments: