[ create a new paste ] login | about

Link: http://codepad.org/P5l2l6dm    [ raw code | output | fork ]

aaronla - Python, pasted on Mar 2:
# simplified monadic (PEG) parser. no memoization, some backtracking.
#  parser :: str -> maybe (value, str)
def ret (value): 
    return lambda s: (value, s)

def bind (p, *fs):
    def parse (s):
        res = p(s)
        for f in fs:
            res = res and f(res[0])(res[1])
        return res
    return parse

never = lambda s: False

def alt (*ps):
    def parse (s):
        for p in ps:
            res = p(s)
            if res: return res
        return False
    return parse

empty = ret(None)

def then (p, *ps):
    return bind(p, lambda x: bind(then(*ps), lambda rest: ret([x]+rest)) if ps else ret([x]))

def repeat (p, res=[]):
    return alt( bind(p, lambda v: repeat(p, res+[v])),
                ret(res) )
def repeat1 (p):
    return bind(p, lambda v: repeat(p, [v]))

def pred (f):
    def parse (s):
        if s and f(s[0]): return (s[0], s[1:])
        else: return False
    return parse

def char (ch):
    return pred(lambda c: c==ch)

def literal (s):
    return then(*(char(c) for c in s))

def delay (fn):
    return bind(empty, lambda _:fn())

def option (p):
    return alt(p, empty)

def repn (p, n):
    return then(*([p]*n))

end = lambda s: False if s else (None, s)

# try out some parsing
def tryit (lang, inp):
    print "parsing", repr(inp), "with", lang
    print " ==>", then(eval(lang), end)(inp)

parens = alt(
    then( char('('), delay(lambda:parens), char(')') ),
    empty
    )

tryit('parens', '()')
tryit('parens', '((()))')
tryit('parens', '(()')

term = alt( repeat1(pred(str.isdigit)),
            then(char('('), 
                delay(lambda:expr), 
                char(')')))
arith = then(term, repeat(then(alt(char('*'), char('/')), 
                               delay(lambda:term))))
expr = then(arith, repeat(then(alt(char('+'), char('-')), 
                               delay(lambda:arith))))
 
tryit('expr', '1')
tryit('expr', '1+2')
tryit('expr', '1+2*3+(4+5)*6')

name = repeat1(pred(str.isalpha))
opentag = bind(char('<'), 
           lambda _: name,
           lambda n: bind(char('>'), lambda _: ret(''.join(n))))
def closetag(n):
    return then(literal('</'), literal(n), char('>'))
xmls = repeat(delay(lambda:xml))
xml = bind(opentag,
           lambda n: then(xmls, closetag(n)))

tryit('xml', '<a></a>')
tryit('xml', '<a><b><c></c><d></d></b></a>')

   


Output:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
parsing '()' with parens
 ==> ([['(', None, ')'], None], '')
parsing '((()))' with parens
 ==> ([['(', ['(', ['(', None, ')'], ')'], ')'], None], '')
parsing '(()' with parens
 ==> False
parsing '1' with expr
 ==> ([[[['1'], []], []], None], '')
parsing '1+2' with expr
 ==> ([[[['1'], []], [['+', [['2'], []]]]], None], '')
parsing '1+2*3+(4+5)*6' with expr
 ==> ([[[['1'], []], [['+', [['2'], [['*', ['3']]]]], ['+', [['(', [[['4'], []], [['+', [['5'], []]]]], ')'], [['*', ['6']]]]]]], None], '')
parsing '<a></a>' with xml
 ==> ([[[], [['<', '/'], ['a'], '>']], None], '')
parsing '<a><b><c></c><d></d></b></a>' with xml
 ==> ([[[[[[[], [['<', '/'], ['c'], '>']], [[], [['<', '/'], ['d'], '>']]], [['<', '/'], ['b'], '>']]], [['<', '/'], ['a'], '>']], None], '')


Create a new paste based on this one


Comments: