Wire the rply parser
Amend the tests to account for the changes
This commit is contained in:
parent
2ed299676f
commit
81af09d002
@ -31,18 +31,16 @@ import sys
|
||||
|
||||
import hy
|
||||
|
||||
from hy.importer import ast_compile, import_buffer_to_module
|
||||
from hy.lex.states import Idle, LexException
|
||||
from hy.lex.machine import Machine
|
||||
from hy.lex import LexException, PrematureEndOfInput, tokenize
|
||||
from hy.compiler import hy_compile
|
||||
from hy.importer import ast_compile, import_buffer_to_module
|
||||
from hy.completer import completion
|
||||
|
||||
from hy.macros import macro, require, process
|
||||
from hy.macros import macro, require
|
||||
from hy.models.expression import HyExpression
|
||||
from hy.models.string import HyString
|
||||
from hy.models.symbol import HySymbol
|
||||
|
||||
_machine = Machine(Idle, 1, 0)
|
||||
|
||||
try:
|
||||
import __builtin__ as builtins
|
||||
@ -72,27 +70,14 @@ builtins.exit = HyQuitter('exit')
|
||||
|
||||
class HyREPL(code.InteractiveConsole):
|
||||
def runsource(self, source, filename='<input>', symbol='single'):
|
||||
global _machine
|
||||
|
||||
try:
|
||||
_machine.process(source + "\n")
|
||||
tokens = tokenize(source)
|
||||
except PrematureEndOfInput:
|
||||
return True
|
||||
except LexException:
|
||||
_machine = Machine(Idle, 1, 0)
|
||||
self.showsyntaxerror(filename)
|
||||
return False
|
||||
|
||||
if type(_machine.state) != Idle:
|
||||
_machine = Machine(Idle, 1, 0)
|
||||
return True
|
||||
|
||||
try:
|
||||
tokens = process(_machine.nodes, "__console__")
|
||||
except Exception:
|
||||
_machine = Machine(Idle, 1, 0)
|
||||
self.showtraceback()
|
||||
return False
|
||||
|
||||
_machine = Machine(Idle, 1, 0)
|
||||
try:
|
||||
_ast = hy_compile(tokens, "__console__", root=ast.Interactive)
|
||||
code = ast_compile(_ast, filename, symbol)
|
||||
|
@ -18,16 +18,21 @@
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
from hy.lex.machine import Machine
|
||||
from hy.lex.states import Idle, LexException
|
||||
from rply.errors import LexingError
|
||||
|
||||
from hy.lex.exceptions import LexException, PrematureEndOfInput # NOQA
|
||||
from hy.lex.lexer import lexer
|
||||
from hy.lex.parser import parser
|
||||
|
||||
|
||||
def tokenize(buf):
|
||||
"""
|
||||
Tokenize a Lisp file or string buffer into internal Hy objects.
|
||||
"""
|
||||
machine = Machine(Idle, 1, 0)
|
||||
machine.process(buf)
|
||||
if type(machine.state) != Idle:
|
||||
raise LexException("Incomplete Lex.")
|
||||
return machine.nodes
|
||||
try:
|
||||
return parser.parse(lexer.lex(buf))
|
||||
except LexingError as e:
|
||||
pos = e.getsourcepos()
|
||||
raise LexException(
|
||||
"Could not identify the next token at line %s, column %s" % (
|
||||
pos.lineno, pos.colno))
|
||||
|
31
hy/lex/exceptions.py
Normal file
31
hy/lex/exceptions.py
Normal file
@ -0,0 +1,31 @@
|
||||
# Copyright (c) 2013 Nicolas Dandrimont <nicolas.dandrimont@crans.org>
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
from hy.errors import HyError
|
||||
|
||||
|
||||
class LexException(HyError):
|
||||
"""Error during the Lexing of a Hython expression."""
|
||||
pass
|
||||
|
||||
|
||||
class PrematureEndOfInput(LexException):
|
||||
"""We got a premature end of input"""
|
||||
pass
|
@ -1,101 +0,0 @@
|
||||
# Copyright (c) 2013 Paul Tagliamonte <paultag@debian.org>
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
from hy.lex.states import Idle, LexException
|
||||
|
||||
|
||||
class Machine(object):
|
||||
"""
|
||||
Hy State Machine. This controls all the state hopping we need to do
|
||||
to properly parse Hy source.
|
||||
"""
|
||||
|
||||
__slots__ = ("submachine", "nodes", "state", "line", "column",
|
||||
"start_line", "start_column")
|
||||
|
||||
def __init__(self, state, line, column):
|
||||
self.nodes = []
|
||||
self.line = line
|
||||
self.column = column
|
||||
self.submachine = None
|
||||
self.state = None
|
||||
self.set_state(state)
|
||||
|
||||
def set_state(self, state):
|
||||
"""
|
||||
Set the new internal machine state. This helps keep line annotations
|
||||
correct, and make sure that we properly call enter and exit.
|
||||
"""
|
||||
|
||||
if self.state:
|
||||
self.state._exit()
|
||||
|
||||
self.accept_result(self.state)
|
||||
|
||||
self.state = state(self)
|
||||
self.state._enter()
|
||||
|
||||
self.start_line = self.line
|
||||
self.start_column = self.column
|
||||
|
||||
def sub(self, state):
|
||||
"""
|
||||
Set up a submachine for this machine.
|
||||
"""
|
||||
self.submachine = Machine(state, self.line, self.column)
|
||||
|
||||
def accept_result(self, state):
|
||||
"""
|
||||
Accept and annotate the result.
|
||||
"""
|
||||
if state and not state.result is None:
|
||||
result = state.result
|
||||
|
||||
result.start_line, result.end_line = (self.start_line, self.line)
|
||||
result.start_column, result.end_column = (self.start_column,
|
||||
self.column)
|
||||
self.nodes.append(result)
|
||||
|
||||
def process(self, buf):
|
||||
"""
|
||||
process an iterable of chars into Hy internal models of the Source.
|
||||
"""
|
||||
for char in buf:
|
||||
|
||||
self.column += 1
|
||||
if char == "\n":
|
||||
self.line += 1
|
||||
self.column = 0
|
||||
|
||||
if self.submachine:
|
||||
self.submachine.process([char])
|
||||
if type(self.submachine.state) == Idle:
|
||||
if len(self.submachine.nodes) > 1:
|
||||
raise LexException("Funky Submachine stuff")
|
||||
|
||||
nodes = self.submachine.nodes
|
||||
self.submachine = None
|
||||
if nodes != []:
|
||||
self.state.nodes.append(nodes[0])
|
||||
continue
|
||||
|
||||
new = self.state.process(char)
|
||||
if new:
|
||||
self.set_state(new)
|
396
hy/lex/states.py
396
hy/lex/states.py
@ -1,396 +0,0 @@
|
||||
# Copyright (c) 2013 Paul Tagliamonte <paultag@debian.org>
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
from hy.models.expression import HyExpression
|
||||
from hy.models.integer import HyInteger
|
||||
from hy.models.lambdalist import HyLambdaListKeyword
|
||||
from hy.models.float import HyFloat
|
||||
from hy.models.complex import HyComplex
|
||||
from hy.models.symbol import HySymbol
|
||||
from hy.models.string import HyString
|
||||
from hy.models.keyword import HyKeyword
|
||||
from hy.models.dict import HyDict
|
||||
from hy.models.list import HyList
|
||||
|
||||
from hy.errors import HyError
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
|
||||
|
||||
WHITESPACE = [" ", "\t", "\n", "\r"]
|
||||
|
||||
|
||||
class LexException(HyError):
|
||||
"""
|
||||
Error during the Lexing of a Hython expression.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def _resolve_atom(obj):
|
||||
"""
|
||||
Resolve a bare atom into one of the following (in order):
|
||||
|
||||
- Integer
|
||||
- LambdaListKeyword
|
||||
- Float
|
||||
- Complex
|
||||
- Symbol
|
||||
"""
|
||||
try:
|
||||
return HyInteger(obj)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if obj.startswith("&"):
|
||||
return HyLambdaListKeyword(obj)
|
||||
|
||||
try:
|
||||
return HyFloat(obj)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if obj != "j":
|
||||
try:
|
||||
return HyComplex(obj)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
table = {
|
||||
"true": "True",
|
||||
"false": "False",
|
||||
"null": "None",
|
||||
}
|
||||
|
||||
if obj in table:
|
||||
return HySymbol(table[obj])
|
||||
|
||||
if obj.startswith(":"):
|
||||
return HyKeyword(obj)
|
||||
|
||||
if obj.startswith("*") and obj.endswith("*") and obj not in ("*", "**"):
|
||||
obj = obj[1:-1].upper()
|
||||
|
||||
if "-" in obj and obj != "-":
|
||||
obj = obj.replace("-", "_")
|
||||
|
||||
return HySymbol(obj)
|
||||
|
||||
|
||||
class State(object):
|
||||
"""
|
||||
Generic State model.
|
||||
"""
|
||||
|
||||
__slots__ = ("nodes", "machine")
|
||||
__metaclass__ = ABCMeta
|
||||
|
||||
def __init__(self, machine):
|
||||
self.machine = machine
|
||||
|
||||
def _enter(self):
|
||||
""" Internal shim for running global ``enter`` code """
|
||||
self.result = None
|
||||
self.nodes = []
|
||||
self.enter()
|
||||
|
||||
def _exit(self):
|
||||
""" Internal shim for running global ``exit`` code """
|
||||
self.exit()
|
||||
|
||||
def enter(self):
|
||||
"""
|
||||
Overridable ``enter`` routines. Subclasses may implement this.
|
||||
"""
|
||||
pass
|
||||
|
||||
def exit(self):
|
||||
"""
|
||||
Overridable ``exit`` routines. Subclasses may implement this.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def process(self, char):
|
||||
"""
|
||||
Overridable ``process`` routines. Subclasses must implement this to be
|
||||
useful.
|
||||
"""
|
||||
pass # ABC
|
||||
|
||||
|
||||
class ListeyThing(State):
|
||||
|
||||
def enter(self):
|
||||
self.buf = ""
|
||||
|
||||
def commit(self):
|
||||
if self.buf != "":
|
||||
ret = _resolve_atom(self.buf)
|
||||
ret.start_line = self._start_line
|
||||
ret.start_column = self._start_column
|
||||
ret.end_line = self.machine.line
|
||||
ret.end_column = (self.machine.column - 1)
|
||||
|
||||
self.nodes.append(ret)
|
||||
self.buf = ""
|
||||
|
||||
def exit(self):
|
||||
self.commit()
|
||||
self.result = self.result_type(self.nodes)
|
||||
|
||||
def process(self, char):
|
||||
if char == "(":
|
||||
self.commit()
|
||||
self.machine.sub(Expression)
|
||||
return
|
||||
|
||||
if char == "{":
|
||||
self.commit()
|
||||
self.machine.sub(Dict)
|
||||
return
|
||||
|
||||
if char == "[":
|
||||
self.commit()
|
||||
self.machine.sub(List)
|
||||
return
|
||||
|
||||
if char == "\"":
|
||||
self.commit()
|
||||
self.machine.sub(String)
|
||||
return
|
||||
|
||||
if char == ";":
|
||||
self.commit()
|
||||
self.machine.sub(Comment)
|
||||
return
|
||||
|
||||
if char == self.end_char:
|
||||
return Idle
|
||||
|
||||
if char in ")]}":
|
||||
raise LexException("Unexpected closing character: `%s'" % (char))
|
||||
|
||||
if char in WHITESPACE:
|
||||
self.commit()
|
||||
return
|
||||
|
||||
if self.buf == "":
|
||||
self._start_line = self.machine.line
|
||||
self._start_column = self.machine.column
|
||||
|
||||
self.buf += char
|
||||
|
||||
|
||||
class List(ListeyThing):
|
||||
"""
|
||||
This state parses a Hy list (like a Clojure vector) for use in native
|
||||
Python interop.
|
||||
|
||||
[foo 1 2 3 4] is a good example.
|
||||
"""
|
||||
|
||||
result_type = HyList
|
||||
end_char = "]"
|
||||
|
||||
|
||||
class Expression(ListeyThing):
|
||||
"""
|
||||
This state parses a Hy expression (statement, to be evaluated at runtime)
|
||||
for running things & stuff.
|
||||
"""
|
||||
|
||||
result_type = HyExpression
|
||||
end_char = ")"
|
||||
|
||||
|
||||
class Dict(ListeyThing):
|
||||
"""
|
||||
This state parses a Hy dict for things.
|
||||
"""
|
||||
|
||||
def exit(self):
|
||||
self.commit()
|
||||
self.result = HyDict(self.nodes)
|
||||
|
||||
end_char = "}"
|
||||
|
||||
|
||||
class String(State):
|
||||
"""
|
||||
String state. This will handle stuff like:
|
||||
|
||||
(println "foobar")
|
||||
^^^^^^^^ -- String
|
||||
"""
|
||||
|
||||
def enter(self):
|
||||
self.escaped = False
|
||||
|
||||
def exit(self):
|
||||
self.result = HyString("".join(self.nodes))
|
||||
|
||||
def process(self, char):
|
||||
"""
|
||||
State transitions:
|
||||
|
||||
- " - Idle
|
||||
"""
|
||||
if self.escaped:
|
||||
self.escaped = False
|
||||
simple_escapables = tuple('abfnrtv')
|
||||
if char in simple_escapables:
|
||||
self.nodes.append(eval('"\\'+char+'"'))
|
||||
return
|
||||
if char == "\\":
|
||||
self.nodes.append("\\")
|
||||
return
|
||||
if char == "\"":
|
||||
self.nodes.append("\"")
|
||||
return
|
||||
|
||||
raise LexException("Unknown modifier: `%s'" % (char))
|
||||
|
||||
if char == "\"":
|
||||
return Idle
|
||||
|
||||
if char == "\\":
|
||||
self.escaped = True
|
||||
return
|
||||
|
||||
self.nodes.append(char)
|
||||
|
||||
|
||||
class Atom(State):
|
||||
"""
|
||||
This state parses integer constants, boolean constants, and symbols
|
||||
"""
|
||||
|
||||
def __init__(self, machine):
|
||||
State.__init__(self, machine)
|
||||
self.initial_buf = ''
|
||||
|
||||
def enter(self):
|
||||
self.buf = self.initial_buf
|
||||
|
||||
def exit(self):
|
||||
self.result = _resolve_atom(self.buf)
|
||||
|
||||
def process(self, char):
|
||||
"""
|
||||
State transitions:
|
||||
|
||||
- WHITESPACE - Idle
|
||||
- ; - Comment
|
||||
"""
|
||||
|
||||
if char in WHITESPACE:
|
||||
return Idle
|
||||
|
||||
if char == ";":
|
||||
return Comment
|
||||
|
||||
self.buf += char
|
||||
|
||||
|
||||
def AtomStartingWith(initial_char):
|
||||
def AtomFactory(machine):
|
||||
state = Atom(machine)
|
||||
state.initial_buf = initial_char
|
||||
return state
|
||||
return AtomFactory
|
||||
|
||||
|
||||
class Idle(State):
|
||||
"""
|
||||
Idle state. This is the first (and last) thing that we should
|
||||
be in.
|
||||
"""
|
||||
|
||||
def process(self, char):
|
||||
"""
|
||||
State transitions:
|
||||
|
||||
- ( - Expression
|
||||
- [ - List
|
||||
- { - Dict
|
||||
- \" - String
|
||||
- ; - Comment
|
||||
- # - Hash
|
||||
- (default) - Atom
|
||||
"""
|
||||
|
||||
if char == "(":
|
||||
return Expression
|
||||
|
||||
if char == "[":
|
||||
return List
|
||||
|
||||
if char == "{":
|
||||
return Dict
|
||||
|
||||
if char == "\"":
|
||||
return String
|
||||
|
||||
if char == ";":
|
||||
return Comment
|
||||
|
||||
if char == "#":
|
||||
return Hash
|
||||
|
||||
if char in WHITESPACE:
|
||||
return
|
||||
|
||||
return AtomStartingWith(char)
|
||||
|
||||
|
||||
class Comment(State):
|
||||
"""
|
||||
Comment state.
|
||||
"""
|
||||
|
||||
def process(self, char):
|
||||
"""
|
||||
State transitions:
|
||||
|
||||
- \n - Idle
|
||||
- (default) - disregard.
|
||||
"""
|
||||
|
||||
if char == "\n":
|
||||
return Idle
|
||||
|
||||
|
||||
class Hash(State):
|
||||
"""
|
||||
Hash state
|
||||
"""
|
||||
|
||||
def process(self, char):
|
||||
"""
|
||||
State transitions:
|
||||
|
||||
- ! - Comment
|
||||
"""
|
||||
|
||||
if char == "!":
|
||||
return Comment
|
||||
|
||||
raise LexException("Unknown char (Hash state): `%s'" % (char))
|
@ -27,9 +27,7 @@ from hy.models.symbol import HySymbol
|
||||
from hy.models.string import HyString
|
||||
from hy.models.dict import HyDict
|
||||
|
||||
from hy.lex.states import LexException
|
||||
|
||||
from hy.lex import tokenize
|
||||
from hy.lex import LexException, PrematureEndOfInput, tokenize
|
||||
|
||||
|
||||
def test_lex_exception():
|
||||
@ -37,13 +35,17 @@ def test_lex_exception():
|
||||
try:
|
||||
tokenize("(foo")
|
||||
assert True is False
|
||||
except LexException:
|
||||
except PrematureEndOfInput:
|
||||
pass
|
||||
|
||||
try:
|
||||
tokenize("&foo&")
|
||||
tokenize("{foo bar")
|
||||
assert True is False
|
||||
except LexException:
|
||||
except PrematureEndOfInput:
|
||||
pass
|
||||
try:
|
||||
tokenize("(defn foo [bar]")
|
||||
assert True is False
|
||||
except PrematureEndOfInput:
|
||||
pass
|
||||
|
||||
|
||||
@ -124,6 +126,8 @@ def test_lex_expression_complex():
|
||||
assert objs == [HyExpression([HySymbol("foo"), HyComplex(-0.5j)])]
|
||||
objs = tokenize("(foo 1.e7j)")
|
||||
assert objs == [HyExpression([HySymbol("foo"), HyComplex(1.e7j)])]
|
||||
objs = tokenize("(foo j)")
|
||||
assert objs == [HyExpression([HySymbol("foo"), HySymbol("j")])]
|
||||
|
||||
|
||||
def test_lex_line_counting():
|
||||
@ -222,11 +226,17 @@ def test_escapes():
|
||||
entry = tokenize("(foo \"foo\\n\")")[0]
|
||||
assert entry[1] == "foo\n"
|
||||
|
||||
try:
|
||||
entry = tokenize("(foo \"foo\s\")")[0]
|
||||
assert True is False
|
||||
except LexException:
|
||||
pass
|
||||
entry = tokenize("(foo \"foo\s\")")[0]
|
||||
assert entry[1] == "foo\\s"
|
||||
|
||||
|
||||
def test_unicode_escapes():
|
||||
"""Ensure unicode escapes are handled correctly"""
|
||||
s = r'"a\xac\u1234\u20ac\U00008000"'
|
||||
assert len(s) == 29
|
||||
entry = tokenize(s)[0]
|
||||
assert len(entry) == 5
|
||||
assert [ord(x) for x in entry] == [97, 172, 4660, 8364, 32768]
|
||||
|
||||
|
||||
def test_hashbang():
|
||||
|
@ -63,7 +63,7 @@ def test_bin_hy_cmd():
|
||||
|
||||
ret = run_cmd("hy -c \"(koan\"")
|
||||
assert ret[0] == 1
|
||||
assert "LexException" in ret[1]
|
||||
assert "PrematureEndOfInput" in ret[1]
|
||||
|
||||
|
||||
def test_bin_hy_icmd():
|
||||
|
Loading…
Reference in New Issue
Block a user