diff --git a/hy/cmdline.py b/hy/cmdline.py index 48b5d8c..aab7384 100644 --- a/hy/cmdline.py +++ b/hy/cmdline.py @@ -31,18 +31,16 @@ import sys import hy -from hy.importer import ast_compile, import_buffer_to_module -from hy.lex.states import Idle, LexException -from hy.lex.machine import Machine +from hy.lex import LexException, PrematureEndOfInput, tokenize from hy.compiler import hy_compile +from hy.importer import ast_compile, import_buffer_to_module from hy.completer import completion -from hy.macros import macro, require, process +from hy.macros import macro, require from hy.models.expression import HyExpression from hy.models.string import HyString from hy.models.symbol import HySymbol -_machine = Machine(Idle, 1, 0) try: import __builtin__ as builtins @@ -72,27 +70,14 @@ builtins.exit = HyQuitter('exit') class HyREPL(code.InteractiveConsole): def runsource(self, source, filename='', symbol='single'): - global _machine - try: - _machine.process(source + "\n") + tokens = tokenize(source) + except PrematureEndOfInput: + return True except LexException: - _machine = Machine(Idle, 1, 0) self.showsyntaxerror(filename) return False - if type(_machine.state) != Idle: - _machine = Machine(Idle, 1, 0) - return True - - try: - tokens = process(_machine.nodes, "__console__") - except Exception: - _machine = Machine(Idle, 1, 0) - self.showtraceback() - return False - - _machine = Machine(Idle, 1, 0) try: _ast = hy_compile(tokens, "__console__", root=ast.Interactive) code = ast_compile(_ast, filename, symbol) diff --git a/hy/lex/__init__.py b/hy/lex/__init__.py index 8cf257a..651f89a 100644 --- a/hy/lex/__init__.py +++ b/hy/lex/__init__.py @@ -18,16 +18,21 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. -from hy.lex.machine import Machine -from hy.lex.states import Idle, LexException +from rply.errors import LexingError + +from hy.lex.exceptions import LexException, PrematureEndOfInput # NOQA +from hy.lex.lexer import lexer +from hy.lex.parser import parser def tokenize(buf): """ Tokenize a Lisp file or string buffer into internal Hy objects. """ - machine = Machine(Idle, 1, 0) - machine.process(buf) - if type(machine.state) != Idle: - raise LexException("Incomplete Lex.") - return machine.nodes + try: + return parser.parse(lexer.lex(buf)) + except LexingError as e: + pos = e.getsourcepos() + raise LexException( + "Could not identify the next token at line %s, column %s" % ( + pos.lineno, pos.colno)) diff --git a/hy/lex/exceptions.py b/hy/lex/exceptions.py new file mode 100644 index 0000000..21b2700 --- /dev/null +++ b/hy/lex/exceptions.py @@ -0,0 +1,31 @@ +# Copyright (c) 2013 Nicolas Dandrimont +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +from hy.errors import HyError + + +class LexException(HyError): + """Error during the Lexing of a Hython expression.""" + pass + + +class PrematureEndOfInput(LexException): + """We got a premature end of input""" + pass diff --git a/hy/lex/machine.py b/hy/lex/machine.py deleted file mode 100644 index f7e92d0..0000000 --- a/hy/lex/machine.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) 2013 Paul Tagliamonte -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -from hy.lex.states import Idle, LexException - - -class Machine(object): - """ - Hy State Machine. This controls all the state hopping we need to do - to properly parse Hy source. - """ - - __slots__ = ("submachine", "nodes", "state", "line", "column", - "start_line", "start_column") - - def __init__(self, state, line, column): - self.nodes = [] - self.line = line - self.column = column - self.submachine = None - self.state = None - self.set_state(state) - - def set_state(self, state): - """ - Set the new internal machine state. This helps keep line annotations - correct, and make sure that we properly call enter and exit. - """ - - if self.state: - self.state._exit() - - self.accept_result(self.state) - - self.state = state(self) - self.state._enter() - - self.start_line = self.line - self.start_column = self.column - - def sub(self, state): - """ - Set up a submachine for this machine. - """ - self.submachine = Machine(state, self.line, self.column) - - def accept_result(self, state): - """ - Accept and annotate the result. - """ - if state and not state.result is None: - result = state.result - - result.start_line, result.end_line = (self.start_line, self.line) - result.start_column, result.end_column = (self.start_column, - self.column) - self.nodes.append(result) - - def process(self, buf): - """ - process an iterable of chars into Hy internal models of the Source. - """ - for char in buf: - - self.column += 1 - if char == "\n": - self.line += 1 - self.column = 0 - - if self.submachine: - self.submachine.process([char]) - if type(self.submachine.state) == Idle: - if len(self.submachine.nodes) > 1: - raise LexException("Funky Submachine stuff") - - nodes = self.submachine.nodes - self.submachine = None - if nodes != []: - self.state.nodes.append(nodes[0]) - continue - - new = self.state.process(char) - if new: - self.set_state(new) diff --git a/hy/lex/states.py b/hy/lex/states.py deleted file mode 100644 index 9c81967..0000000 --- a/hy/lex/states.py +++ /dev/null @@ -1,396 +0,0 @@ -# Copyright (c) 2013 Paul Tagliamonte -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -from hy.models.expression import HyExpression -from hy.models.integer import HyInteger -from hy.models.lambdalist import HyLambdaListKeyword -from hy.models.float import HyFloat -from hy.models.complex import HyComplex -from hy.models.symbol import HySymbol -from hy.models.string import HyString -from hy.models.keyword import HyKeyword -from hy.models.dict import HyDict -from hy.models.list import HyList - -from hy.errors import HyError - -from abc import ABCMeta, abstractmethod - - -WHITESPACE = [" ", "\t", "\n", "\r"] - - -class LexException(HyError): - """ - Error during the Lexing of a Hython expression. - """ - pass - - -def _resolve_atom(obj): - """ - Resolve a bare atom into one of the following (in order): - - - Integer - - LambdaListKeyword - - Float - - Complex - - Symbol - """ - try: - return HyInteger(obj) - except ValueError: - pass - - if obj.startswith("&"): - return HyLambdaListKeyword(obj) - - try: - return HyFloat(obj) - except ValueError: - pass - - if obj != "j": - try: - return HyComplex(obj) - except ValueError: - pass - - table = { - "true": "True", - "false": "False", - "null": "None", - } - - if obj in table: - return HySymbol(table[obj]) - - if obj.startswith(":"): - return HyKeyword(obj) - - if obj.startswith("*") and obj.endswith("*") and obj not in ("*", "**"): - obj = obj[1:-1].upper() - - if "-" in obj and obj != "-": - obj = obj.replace("-", "_") - - return HySymbol(obj) - - -class State(object): - """ - Generic State model. - """ - - __slots__ = ("nodes", "machine") - __metaclass__ = ABCMeta - - def __init__(self, machine): - self.machine = machine - - def _enter(self): - """ Internal shim for running global ``enter`` code """ - self.result = None - self.nodes = [] - self.enter() - - def _exit(self): - """ Internal shim for running global ``exit`` code """ - self.exit() - - def enter(self): - """ - Overridable ``enter`` routines. Subclasses may implement this. - """ - pass - - def exit(self): - """ - Overridable ``exit`` routines. Subclasses may implement this. - """ - pass - - @abstractmethod - def process(self, char): - """ - Overridable ``process`` routines. Subclasses must implement this to be - useful. - """ - pass # ABC - - -class ListeyThing(State): - - def enter(self): - self.buf = "" - - def commit(self): - if self.buf != "": - ret = _resolve_atom(self.buf) - ret.start_line = self._start_line - ret.start_column = self._start_column - ret.end_line = self.machine.line - ret.end_column = (self.machine.column - 1) - - self.nodes.append(ret) - self.buf = "" - - def exit(self): - self.commit() - self.result = self.result_type(self.nodes) - - def process(self, char): - if char == "(": - self.commit() - self.machine.sub(Expression) - return - - if char == "{": - self.commit() - self.machine.sub(Dict) - return - - if char == "[": - self.commit() - self.machine.sub(List) - return - - if char == "\"": - self.commit() - self.machine.sub(String) - return - - if char == ";": - self.commit() - self.machine.sub(Comment) - return - - if char == self.end_char: - return Idle - - if char in ")]}": - raise LexException("Unexpected closing character: `%s'" % (char)) - - if char in WHITESPACE: - self.commit() - return - - if self.buf == "": - self._start_line = self.machine.line - self._start_column = self.machine.column - - self.buf += char - - -class List(ListeyThing): - """ - This state parses a Hy list (like a Clojure vector) for use in native - Python interop. - - [foo 1 2 3 4] is a good example. - """ - - result_type = HyList - end_char = "]" - - -class Expression(ListeyThing): - """ - This state parses a Hy expression (statement, to be evaluated at runtime) - for running things & stuff. - """ - - result_type = HyExpression - end_char = ")" - - -class Dict(ListeyThing): - """ - This state parses a Hy dict for things. - """ - - def exit(self): - self.commit() - self.result = HyDict(self.nodes) - - end_char = "}" - - -class String(State): - """ - String state. This will handle stuff like: - - (println "foobar") - ^^^^^^^^ -- String - """ - - def enter(self): - self.escaped = False - - def exit(self): - self.result = HyString("".join(self.nodes)) - - def process(self, char): - """ - State transitions: - - - " - Idle - """ - if self.escaped: - self.escaped = False - simple_escapables = tuple('abfnrtv') - if char in simple_escapables: - self.nodes.append(eval('"\\'+char+'"')) - return - if char == "\\": - self.nodes.append("\\") - return - if char == "\"": - self.nodes.append("\"") - return - - raise LexException("Unknown modifier: `%s'" % (char)) - - if char == "\"": - return Idle - - if char == "\\": - self.escaped = True - return - - self.nodes.append(char) - - -class Atom(State): - """ - This state parses integer constants, boolean constants, and symbols - """ - - def __init__(self, machine): - State.__init__(self, machine) - self.initial_buf = '' - - def enter(self): - self.buf = self.initial_buf - - def exit(self): - self.result = _resolve_atom(self.buf) - - def process(self, char): - """ - State transitions: - - - WHITESPACE - Idle - - ; - Comment - """ - - if char in WHITESPACE: - return Idle - - if char == ";": - return Comment - - self.buf += char - - -def AtomStartingWith(initial_char): - def AtomFactory(machine): - state = Atom(machine) - state.initial_buf = initial_char - return state - return AtomFactory - - -class Idle(State): - """ - Idle state. This is the first (and last) thing that we should - be in. - """ - - def process(self, char): - """ - State transitions: - - - ( - Expression - - [ - List - - { - Dict - - \" - String - - ; - Comment - - # - Hash - - (default) - Atom - """ - - if char == "(": - return Expression - - if char == "[": - return List - - if char == "{": - return Dict - - if char == "\"": - return String - - if char == ";": - return Comment - - if char == "#": - return Hash - - if char in WHITESPACE: - return - - return AtomStartingWith(char) - - -class Comment(State): - """ - Comment state. - """ - - def process(self, char): - """ - State transitions: - - - \n - Idle - - (default) - disregard. - """ - - if char == "\n": - return Idle - - -class Hash(State): - """ - Hash state - """ - - def process(self, char): - """ - State transitions: - - - ! - Comment - """ - - if char == "!": - return Comment - - raise LexException("Unknown char (Hash state): `%s'" % (char)) diff --git a/tests/lex/test_lex.py b/tests/lex/test_lex.py index 7b90750..359999b 100644 --- a/tests/lex/test_lex.py +++ b/tests/lex/test_lex.py @@ -27,9 +27,7 @@ from hy.models.symbol import HySymbol from hy.models.string import HyString from hy.models.dict import HyDict -from hy.lex.states import LexException - -from hy.lex import tokenize +from hy.lex import LexException, PrematureEndOfInput, tokenize def test_lex_exception(): @@ -37,13 +35,17 @@ def test_lex_exception(): try: tokenize("(foo") assert True is False - except LexException: + except PrematureEndOfInput: pass - try: - tokenize("&foo&") + tokenize("{foo bar") assert True is False - except LexException: + except PrematureEndOfInput: + pass + try: + tokenize("(defn foo [bar]") + assert True is False + except PrematureEndOfInput: pass @@ -124,6 +126,8 @@ def test_lex_expression_complex(): assert objs == [HyExpression([HySymbol("foo"), HyComplex(-0.5j)])] objs = tokenize("(foo 1.e7j)") assert objs == [HyExpression([HySymbol("foo"), HyComplex(1.e7j)])] + objs = tokenize("(foo j)") + assert objs == [HyExpression([HySymbol("foo"), HySymbol("j")])] def test_lex_line_counting(): @@ -222,11 +226,17 @@ def test_escapes(): entry = tokenize("(foo \"foo\\n\")")[0] assert entry[1] == "foo\n" - try: - entry = tokenize("(foo \"foo\s\")")[0] - assert True is False - except LexException: - pass + entry = tokenize("(foo \"foo\s\")")[0] + assert entry[1] == "foo\\s" + + +def test_unicode_escapes(): + """Ensure unicode escapes are handled correctly""" + s = r'"a\xac\u1234\u20ac\U00008000"' + assert len(s) == 29 + entry = tokenize(s)[0] + assert len(entry) == 5 + assert [ord(x) for x in entry] == [97, 172, 4660, 8364, 32768] def test_hashbang(): diff --git a/tests/test_bin.py b/tests/test_bin.py index 3a4849c..f62957e 100644 --- a/tests/test_bin.py +++ b/tests/test_bin.py @@ -63,7 +63,7 @@ def test_bin_hy_cmd(): ret = run_cmd("hy -c \"(koan\"") assert ret[0] == 1 - assert "LexException" in ret[1] + assert "PrematureEndOfInput" in ret[1] def test_bin_hy_icmd():