From 68cf93e6d0bb38a33cc734f9944b26f166e57553 Mon Sep 17 00:00:00 2001 From: Paul Tagliamonte Date: Sat, 15 Dec 2012 17:16:58 -0500 Subject: [PATCH] adding in some silly lexing voodoo --- hy/lex/states.py | 37 ++++++++++++++++++++++++++++++++++++- hy/lex/tokenize.py | 3 +++ test.py | 4 ++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/hy/lex/states.py b/hy/lex/states.py index fe29aa7..b7cd24c 100644 --- a/hy/lex/states.py +++ b/hy/lex/states.py @@ -1,4 +1,5 @@ from hy.lang.expression import HYExpression +from hy.lex.errors import LexException from hy.lex.machine import Machine @@ -69,10 +70,44 @@ class Expression(State): return if x == "\"": - return String + self.sub_machine = Machine(String) + return if x == "(": self.sub_machine = Machine(Expression) return self.bulk += x + + +class String(State): + magic = { + "n": "\n", + "t": "\t", + "\\": "\\", + "\"": "\"" + } + + def enter(self): + self.buf = "" + self.esc = False + + def exit(self): + self.machine.nodes.append(self.buf) + + def p(self, x): + if x == "\\": + self.esc = True + return + + if x == "\"" and not self.esc: + return Idle + + if self.esc and x not in self.magic: + raise LexException("Unknown escape: \\%s" % (x)) + elif self.esc: + x = self.magic[x] + + self.esc = False + + self.buf += x diff --git a/hy/lex/tokenize.py b/hy/lex/tokenize.py index 17e99ab..53a02ac 100644 --- a/hy/lex/tokenize.py +++ b/hy/lex/tokenize.py @@ -1,7 +1,10 @@ from hy.lex.machine import Machine from hy.lex.states import Idle +from hy.lex.errors import LexException def tokenize(buff): m = Machine(Idle) m.process(buff) + if type(m.state) != Idle: + raise LexException("End of file.") return m.nodes diff --git a/test.py b/test.py index f75a744..0a6dc50 100644 --- a/test.py +++ b/test.py @@ -4,3 +4,7 @@ from hy.lex.tokenize import tokenize print tokenize(""" (+ 2 (+ 1 1) (- 1 1)) """) + +print tokenize(""" +(print "Hello, \\n World") +""")