From f7552149db3c3b6ca4c2cd22fc0ec4e7062141ca Mon Sep 17 00:00:00 2001 From: Paul Tagliamonte Date: Sat, 15 Dec 2012 16:26:03 -0500 Subject: [PATCH] Adding in a test lexer. --- hy/lex/__init__.py | 0 hy/lex/tokenize.py | 98 ++++++++++++++++++++++++++++++++++++++++++++++ test.py | 8 ++++ 3 files changed, 106 insertions(+) create mode 100644 hy/lex/__init__.py create mode 100644 hy/lex/tokenize.py create mode 100644 test.py diff --git a/hy/lex/__init__.py b/hy/lex/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/hy/lex/tokenize.py b/hy/lex/tokenize.py new file mode 100644 index 0000000..6f50f82 --- /dev/null +++ b/hy/lex/tokenize.py @@ -0,0 +1,98 @@ + +class HYExpression(list): + def __init__(self, nodes): + self += nodes + + +class LexException(Exception): + pass + + +class State(object): + def __init__(self, machine): + self.machine = machine + + def enter(self): pass + def exit(self): pass + def process(self, x): pass + + +class Comment(State): + def process(self, x): + if x == '\n': + return Idle + + +class Idle(State): + def process(self, x): + if x == ";": + return Comment + if x == "(": + return Expression + if x in [" ", "\t", "\n", "\r"]: + return + + raise LexException("Unknown char: %s" % (x)) + + +class Expression(State): + def enter(self): + self.nodes = HYExpression([]) + self.bulk = "" + self.sub_machine = None + + def exit(self): + if self.bulk: + self.nodes.append(self.bulk) + + self.machine.nodes.append(self.nodes) + + def commit(self): + if self.bulk.strip() != "": + self.nodes.append(self.bulk) + self.bulk = "" + + def process(self, x): + if self.sub_machine is not None: + self.sub_machine.process(x) + if type(self.sub_machine.state) == Idle: + self.nodes.append(self.sub_machine.nodes) + self.sub_machine = None + return + + if x == ")": + return Idle + + if x == " ": + self.commit() + return + + if x == "(": + self.sub_machine = Machine(Expression) + return + + self.bulk += x + + +class Machine(object): + def __init__(self, state): + # print "New machine: %s" % (state) + self.nodes = [] + self.state = state(self) + self.state.enter() + + def process(self, buf): + for i in range(0, len(buf)): + char = buf[i] + nx = self.state.process(char) + if nx: + # print "New state: %s" % (nx) + self.state.exit() + self.state = nx(self) + self.state.enter() + + +def tokenize(buff): + m = Machine(Idle) + m.process(buff) + return m.nodes diff --git a/test.py b/test.py new file mode 100644 index 0000000..9cdac1e --- /dev/null +++ b/test.py @@ -0,0 +1,8 @@ + +from hy.lex.tokenize import tokenize + +print tokenize(""" +(+ 1 1) ; this adds one plus one +(- 1 1) ; this does other things +(print (+ 1 1)) +""")