diff --git a/hy/cmdline.py b/hy/cmdline.py
index 48b5d8c..aab7384 100644
--- a/hy/cmdline.py
+++ b/hy/cmdline.py
@@ -31,18 +31,16 @@ import sys
import hy
-from hy.importer import ast_compile, import_buffer_to_module
-from hy.lex.states import Idle, LexException
-from hy.lex.machine import Machine
+from hy.lex import LexException, PrematureEndOfInput, tokenize
from hy.compiler import hy_compile
+from hy.importer import ast_compile, import_buffer_to_module
from hy.completer import completion
-from hy.macros import macro, require, process
+from hy.macros import macro, require
from hy.models.expression import HyExpression
from hy.models.string import HyString
from hy.models.symbol import HySymbol
-_machine = Machine(Idle, 1, 0)
try:
import __builtin__ as builtins
@@ -72,27 +70,14 @@ builtins.exit = HyQuitter('exit')
class HyREPL(code.InteractiveConsole):
def runsource(self, source, filename='', symbol='single'):
- global _machine
-
try:
- _machine.process(source + "\n")
+ tokens = tokenize(source)
+ except PrematureEndOfInput:
+ return True
except LexException:
- _machine = Machine(Idle, 1, 0)
self.showsyntaxerror(filename)
return False
- if type(_machine.state) != Idle:
- _machine = Machine(Idle, 1, 0)
- return True
-
- try:
- tokens = process(_machine.nodes, "__console__")
- except Exception:
- _machine = Machine(Idle, 1, 0)
- self.showtraceback()
- return False
-
- _machine = Machine(Idle, 1, 0)
try:
_ast = hy_compile(tokens, "__console__", root=ast.Interactive)
code = ast_compile(_ast, filename, symbol)
diff --git a/hy/lex/__init__.py b/hy/lex/__init__.py
index 8cf257a..651f89a 100644
--- a/hy/lex/__init__.py
+++ b/hy/lex/__init__.py
@@ -18,16 +18,21 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
-from hy.lex.machine import Machine
-from hy.lex.states import Idle, LexException
+from rply.errors import LexingError
+
+from hy.lex.exceptions import LexException, PrematureEndOfInput # NOQA
+from hy.lex.lexer import lexer
+from hy.lex.parser import parser
def tokenize(buf):
"""
Tokenize a Lisp file or string buffer into internal Hy objects.
"""
- machine = Machine(Idle, 1, 0)
- machine.process(buf)
- if type(machine.state) != Idle:
- raise LexException("Incomplete Lex.")
- return machine.nodes
+ try:
+ return parser.parse(lexer.lex(buf))
+ except LexingError as e:
+ pos = e.getsourcepos()
+ raise LexException(
+ "Could not identify the next token at line %s, column %s" % (
+ pos.lineno, pos.colno))
diff --git a/hy/lex/exceptions.py b/hy/lex/exceptions.py
new file mode 100644
index 0000000..21b2700
--- /dev/null
+++ b/hy/lex/exceptions.py
@@ -0,0 +1,31 @@
+# Copyright (c) 2013 Nicolas Dandrimont
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+from hy.errors import HyError
+
+
+class LexException(HyError):
+ """Error during the Lexing of a Hython expression."""
+ pass
+
+
+class PrematureEndOfInput(LexException):
+ """We got a premature end of input"""
+ pass
diff --git a/hy/lex/machine.py b/hy/lex/machine.py
deleted file mode 100644
index f7e92d0..0000000
--- a/hy/lex/machine.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2013 Paul Tagliamonte
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-from hy.lex.states import Idle, LexException
-
-
-class Machine(object):
- """
- Hy State Machine. This controls all the state hopping we need to do
- to properly parse Hy source.
- """
-
- __slots__ = ("submachine", "nodes", "state", "line", "column",
- "start_line", "start_column")
-
- def __init__(self, state, line, column):
- self.nodes = []
- self.line = line
- self.column = column
- self.submachine = None
- self.state = None
- self.set_state(state)
-
- def set_state(self, state):
- """
- Set the new internal machine state. This helps keep line annotations
- correct, and make sure that we properly call enter and exit.
- """
-
- if self.state:
- self.state._exit()
-
- self.accept_result(self.state)
-
- self.state = state(self)
- self.state._enter()
-
- self.start_line = self.line
- self.start_column = self.column
-
- def sub(self, state):
- """
- Set up a submachine for this machine.
- """
- self.submachine = Machine(state, self.line, self.column)
-
- def accept_result(self, state):
- """
- Accept and annotate the result.
- """
- if state and not state.result is None:
- result = state.result
-
- result.start_line, result.end_line = (self.start_line, self.line)
- result.start_column, result.end_column = (self.start_column,
- self.column)
- self.nodes.append(result)
-
- def process(self, buf):
- """
- process an iterable of chars into Hy internal models of the Source.
- """
- for char in buf:
-
- self.column += 1
- if char == "\n":
- self.line += 1
- self.column = 0
-
- if self.submachine:
- self.submachine.process([char])
- if type(self.submachine.state) == Idle:
- if len(self.submachine.nodes) > 1:
- raise LexException("Funky Submachine stuff")
-
- nodes = self.submachine.nodes
- self.submachine = None
- if nodes != []:
- self.state.nodes.append(nodes[0])
- continue
-
- new = self.state.process(char)
- if new:
- self.set_state(new)
diff --git a/hy/lex/states.py b/hy/lex/states.py
deleted file mode 100644
index 9c81967..0000000
--- a/hy/lex/states.py
+++ /dev/null
@@ -1,396 +0,0 @@
-# Copyright (c) 2013 Paul Tagliamonte
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-from hy.models.expression import HyExpression
-from hy.models.integer import HyInteger
-from hy.models.lambdalist import HyLambdaListKeyword
-from hy.models.float import HyFloat
-from hy.models.complex import HyComplex
-from hy.models.symbol import HySymbol
-from hy.models.string import HyString
-from hy.models.keyword import HyKeyword
-from hy.models.dict import HyDict
-from hy.models.list import HyList
-
-from hy.errors import HyError
-
-from abc import ABCMeta, abstractmethod
-
-
-WHITESPACE = [" ", "\t", "\n", "\r"]
-
-
-class LexException(HyError):
- """
- Error during the Lexing of a Hython expression.
- """
- pass
-
-
-def _resolve_atom(obj):
- """
- Resolve a bare atom into one of the following (in order):
-
- - Integer
- - LambdaListKeyword
- - Float
- - Complex
- - Symbol
- """
- try:
- return HyInteger(obj)
- except ValueError:
- pass
-
- if obj.startswith("&"):
- return HyLambdaListKeyword(obj)
-
- try:
- return HyFloat(obj)
- except ValueError:
- pass
-
- if obj != "j":
- try:
- return HyComplex(obj)
- except ValueError:
- pass
-
- table = {
- "true": "True",
- "false": "False",
- "null": "None",
- }
-
- if obj in table:
- return HySymbol(table[obj])
-
- if obj.startswith(":"):
- return HyKeyword(obj)
-
- if obj.startswith("*") and obj.endswith("*") and obj not in ("*", "**"):
- obj = obj[1:-1].upper()
-
- if "-" in obj and obj != "-":
- obj = obj.replace("-", "_")
-
- return HySymbol(obj)
-
-
-class State(object):
- """
- Generic State model.
- """
-
- __slots__ = ("nodes", "machine")
- __metaclass__ = ABCMeta
-
- def __init__(self, machine):
- self.machine = machine
-
- def _enter(self):
- """ Internal shim for running global ``enter`` code """
- self.result = None
- self.nodes = []
- self.enter()
-
- def _exit(self):
- """ Internal shim for running global ``exit`` code """
- self.exit()
-
- def enter(self):
- """
- Overridable ``enter`` routines. Subclasses may implement this.
- """
- pass
-
- def exit(self):
- """
- Overridable ``exit`` routines. Subclasses may implement this.
- """
- pass
-
- @abstractmethod
- def process(self, char):
- """
- Overridable ``process`` routines. Subclasses must implement this to be
- useful.
- """
- pass # ABC
-
-
-class ListeyThing(State):
-
- def enter(self):
- self.buf = ""
-
- def commit(self):
- if self.buf != "":
- ret = _resolve_atom(self.buf)
- ret.start_line = self._start_line
- ret.start_column = self._start_column
- ret.end_line = self.machine.line
- ret.end_column = (self.machine.column - 1)
-
- self.nodes.append(ret)
- self.buf = ""
-
- def exit(self):
- self.commit()
- self.result = self.result_type(self.nodes)
-
- def process(self, char):
- if char == "(":
- self.commit()
- self.machine.sub(Expression)
- return
-
- if char == "{":
- self.commit()
- self.machine.sub(Dict)
- return
-
- if char == "[":
- self.commit()
- self.machine.sub(List)
- return
-
- if char == "\"":
- self.commit()
- self.machine.sub(String)
- return
-
- if char == ";":
- self.commit()
- self.machine.sub(Comment)
- return
-
- if char == self.end_char:
- return Idle
-
- if char in ")]}":
- raise LexException("Unexpected closing character: `%s'" % (char))
-
- if char in WHITESPACE:
- self.commit()
- return
-
- if self.buf == "":
- self._start_line = self.machine.line
- self._start_column = self.machine.column
-
- self.buf += char
-
-
-class List(ListeyThing):
- """
- This state parses a Hy list (like a Clojure vector) for use in native
- Python interop.
-
- [foo 1 2 3 4] is a good example.
- """
-
- result_type = HyList
- end_char = "]"
-
-
-class Expression(ListeyThing):
- """
- This state parses a Hy expression (statement, to be evaluated at runtime)
- for running things & stuff.
- """
-
- result_type = HyExpression
- end_char = ")"
-
-
-class Dict(ListeyThing):
- """
- This state parses a Hy dict for things.
- """
-
- def exit(self):
- self.commit()
- self.result = HyDict(self.nodes)
-
- end_char = "}"
-
-
-class String(State):
- """
- String state. This will handle stuff like:
-
- (println "foobar")
- ^^^^^^^^ -- String
- """
-
- def enter(self):
- self.escaped = False
-
- def exit(self):
- self.result = HyString("".join(self.nodes))
-
- def process(self, char):
- """
- State transitions:
-
- - " - Idle
- """
- if self.escaped:
- self.escaped = False
- simple_escapables = tuple('abfnrtv')
- if char in simple_escapables:
- self.nodes.append(eval('"\\'+char+'"'))
- return
- if char == "\\":
- self.nodes.append("\\")
- return
- if char == "\"":
- self.nodes.append("\"")
- return
-
- raise LexException("Unknown modifier: `%s'" % (char))
-
- if char == "\"":
- return Idle
-
- if char == "\\":
- self.escaped = True
- return
-
- self.nodes.append(char)
-
-
-class Atom(State):
- """
- This state parses integer constants, boolean constants, and symbols
- """
-
- def __init__(self, machine):
- State.__init__(self, machine)
- self.initial_buf = ''
-
- def enter(self):
- self.buf = self.initial_buf
-
- def exit(self):
- self.result = _resolve_atom(self.buf)
-
- def process(self, char):
- """
- State transitions:
-
- - WHITESPACE - Idle
- - ; - Comment
- """
-
- if char in WHITESPACE:
- return Idle
-
- if char == ";":
- return Comment
-
- self.buf += char
-
-
-def AtomStartingWith(initial_char):
- def AtomFactory(machine):
- state = Atom(machine)
- state.initial_buf = initial_char
- return state
- return AtomFactory
-
-
-class Idle(State):
- """
- Idle state. This is the first (and last) thing that we should
- be in.
- """
-
- def process(self, char):
- """
- State transitions:
-
- - ( - Expression
- - [ - List
- - { - Dict
- - \" - String
- - ; - Comment
- - # - Hash
- - (default) - Atom
- """
-
- if char == "(":
- return Expression
-
- if char == "[":
- return List
-
- if char == "{":
- return Dict
-
- if char == "\"":
- return String
-
- if char == ";":
- return Comment
-
- if char == "#":
- return Hash
-
- if char in WHITESPACE:
- return
-
- return AtomStartingWith(char)
-
-
-class Comment(State):
- """
- Comment state.
- """
-
- def process(self, char):
- """
- State transitions:
-
- - \n - Idle
- - (default) - disregard.
- """
-
- if char == "\n":
- return Idle
-
-
-class Hash(State):
- """
- Hash state
- """
-
- def process(self, char):
- """
- State transitions:
-
- - ! - Comment
- """
-
- if char == "!":
- return Comment
-
- raise LexException("Unknown char (Hash state): `%s'" % (char))
diff --git a/tests/lex/test_lex.py b/tests/lex/test_lex.py
index 7b90750..359999b 100644
--- a/tests/lex/test_lex.py
+++ b/tests/lex/test_lex.py
@@ -27,9 +27,7 @@ from hy.models.symbol import HySymbol
from hy.models.string import HyString
from hy.models.dict import HyDict
-from hy.lex.states import LexException
-
-from hy.lex import tokenize
+from hy.lex import LexException, PrematureEndOfInput, tokenize
def test_lex_exception():
@@ -37,13 +35,17 @@ def test_lex_exception():
try:
tokenize("(foo")
assert True is False
- except LexException:
+ except PrematureEndOfInput:
pass
-
try:
- tokenize("&foo&")
+ tokenize("{foo bar")
assert True is False
- except LexException:
+ except PrematureEndOfInput:
+ pass
+ try:
+ tokenize("(defn foo [bar]")
+ assert True is False
+ except PrematureEndOfInput:
pass
@@ -124,6 +126,8 @@ def test_lex_expression_complex():
assert objs == [HyExpression([HySymbol("foo"), HyComplex(-0.5j)])]
objs = tokenize("(foo 1.e7j)")
assert objs == [HyExpression([HySymbol("foo"), HyComplex(1.e7j)])]
+ objs = tokenize("(foo j)")
+ assert objs == [HyExpression([HySymbol("foo"), HySymbol("j")])]
def test_lex_line_counting():
@@ -222,11 +226,17 @@ def test_escapes():
entry = tokenize("(foo \"foo\\n\")")[0]
assert entry[1] == "foo\n"
- try:
- entry = tokenize("(foo \"foo\s\")")[0]
- assert True is False
- except LexException:
- pass
+ entry = tokenize("(foo \"foo\s\")")[0]
+ assert entry[1] == "foo\\s"
+
+
+def test_unicode_escapes():
+ """Ensure unicode escapes are handled correctly"""
+ s = r'"a\xac\u1234\u20ac\U00008000"'
+ assert len(s) == 29
+ entry = tokenize(s)[0]
+ assert len(entry) == 5
+ assert [ord(x) for x in entry] == [97, 172, 4660, 8364, 32768]
def test_hashbang():
diff --git a/tests/test_bin.py b/tests/test_bin.py
index 3a4849c..f62957e 100644
--- a/tests/test_bin.py
+++ b/tests/test_bin.py
@@ -63,7 +63,7 @@ def test_bin_hy_cmd():
ret = run_cmd("hy -c \"(koan\"")
assert ret[0] == 1
- assert "LexException" in ret[1]
+ assert "PrematureEndOfInput" in ret[1]
def test_bin_hy_icmd():