[ create a new paste ] login | about

Link: http://codepad.org/RZhkgHf7    [ raw code | fork ]

Python, pasted on Jan 5:
#!/usr/bin/env python

from HTMLParser import HTMLParser
import greenlet

class IterParser(HTMLParser):
    def feed(self, data):
        self.parent = greenlet.getcurrent().parent
        HTMLParser.feed(self, data)

    def handle_starttag(self, tag, attrs):
        self.parent.switch(('START', tag, attrs))

    def handle_startendtag(self, tag, attrs):
        self.parent.switch(('STARTEND', tag, attrs))

    def handle_endtag(self, tag):
        self.parent.switch(('END', tag))

    def handle_data(self, data):
        self.parent.switch(('DATA', data))

    def handle_charref(self, ref):
        self.parent.switch(('CHARREF', ref))

    def handle_entityref(self, name):
        self.parent.switch(('ENTITYREF', name))

    def handle_comment(self, data):
        self.parent.switch(('COMMENT', data))

    def handle_decl(self, decl):
        self.parent.switch(('DECL', decl))

    def handle_pi(self, data):
        self.parent.switch(('PI', data))

def iterparse(stream):
    parser = IterParser()
    for chunk in iter(lambda: stream.read(8192), ''):
        coro = greenlet.greenlet(lambda: parser.feed(chunk))
        while not coro.dead:
            yield coro.switch()

import sys
if __name__ == '__main__':
    stream = sys.stdin if len(sys.argv) == 1 else open(sys.argv[1], 'r')
    for token in iterparse(stream):
        print token


Create a new paste based on this one


Comments: