Wire the rply parser

Amend the tests to account for the changes
This commit is contained in:
Nicolas Dandrimont 2013-04-12 17:30:13 +02:00
parent 2ed299676f
commit 81af09d002
7 changed files with 72 additions and 538 deletions

View File

@ -31,18 +31,16 @@ import sys
import hy import hy
from hy.importer import ast_compile, import_buffer_to_module from hy.lex import LexException, PrematureEndOfInput, tokenize
from hy.lex.states import Idle, LexException
from hy.lex.machine import Machine
from hy.compiler import hy_compile from hy.compiler import hy_compile
from hy.importer import ast_compile, import_buffer_to_module
from hy.completer import completion from hy.completer import completion
from hy.macros import macro, require, process from hy.macros import macro, require
from hy.models.expression import HyExpression from hy.models.expression import HyExpression
from hy.models.string import HyString from hy.models.string import HyString
from hy.models.symbol import HySymbol from hy.models.symbol import HySymbol
_machine = Machine(Idle, 1, 0)
try: try:
import __builtin__ as builtins import __builtin__ as builtins
@ -72,27 +70,14 @@ builtins.exit = HyQuitter('exit')
class HyREPL(code.InteractiveConsole): class HyREPL(code.InteractiveConsole):
def runsource(self, source, filename='<input>', symbol='single'): def runsource(self, source, filename='<input>', symbol='single'):
global _machine
try: try:
_machine.process(source + "\n") tokens = tokenize(source)
except PrematureEndOfInput:
return True
except LexException: except LexException:
_machine = Machine(Idle, 1, 0)
self.showsyntaxerror(filename) self.showsyntaxerror(filename)
return False return False
if type(_machine.state) != Idle:
_machine = Machine(Idle, 1, 0)
return True
try:
tokens = process(_machine.nodes, "__console__")
except Exception:
_machine = Machine(Idle, 1, 0)
self.showtraceback()
return False
_machine = Machine(Idle, 1, 0)
try: try:
_ast = hy_compile(tokens, "__console__", root=ast.Interactive) _ast = hy_compile(tokens, "__console__", root=ast.Interactive)
code = ast_compile(_ast, filename, symbol) code = ast_compile(_ast, filename, symbol)

View File

@ -18,16 +18,21 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE. # DEALINGS IN THE SOFTWARE.
from hy.lex.machine import Machine from rply.errors import LexingError
from hy.lex.states import Idle, LexException
from hy.lex.exceptions import LexException, PrematureEndOfInput # NOQA
from hy.lex.lexer import lexer
from hy.lex.parser import parser
def tokenize(buf): def tokenize(buf):
""" """
Tokenize a Lisp file or string buffer into internal Hy objects. Tokenize a Lisp file or string buffer into internal Hy objects.
""" """
machine = Machine(Idle, 1, 0) try:
machine.process(buf) return parser.parse(lexer.lex(buf))
if type(machine.state) != Idle: except LexingError as e:
raise LexException("Incomplete Lex.") pos = e.getsourcepos()
return machine.nodes raise LexException(
"Could not identify the next token at line %s, column %s" % (
pos.lineno, pos.colno))

31
hy/lex/exceptions.py Normal file
View File

@ -0,0 +1,31 @@
# Copyright (c) 2013 Nicolas Dandrimont <nicolas.dandrimont@crans.org>
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
from hy.errors import HyError
class LexException(HyError):
"""Error during the Lexing of a Hython expression."""
pass
class PrematureEndOfInput(LexException):
"""We got a premature end of input"""
pass

View File

@ -1,101 +0,0 @@
# Copyright (c) 2013 Paul Tagliamonte <paultag@debian.org>
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
from hy.lex.states import Idle, LexException
class Machine(object):
"""
Hy State Machine. This controls all the state hopping we need to do
to properly parse Hy source.
"""
__slots__ = ("submachine", "nodes", "state", "line", "column",
"start_line", "start_column")
def __init__(self, state, line, column):
self.nodes = []
self.line = line
self.column = column
self.submachine = None
self.state = None
self.set_state(state)
def set_state(self, state):
"""
Set the new internal machine state. This helps keep line annotations
correct, and make sure that we properly call enter and exit.
"""
if self.state:
self.state._exit()
self.accept_result(self.state)
self.state = state(self)
self.state._enter()
self.start_line = self.line
self.start_column = self.column
def sub(self, state):
"""
Set up a submachine for this machine.
"""
self.submachine = Machine(state, self.line, self.column)
def accept_result(self, state):
"""
Accept and annotate the result.
"""
if state and not state.result is None:
result = state.result
result.start_line, result.end_line = (self.start_line, self.line)
result.start_column, result.end_column = (self.start_column,
self.column)
self.nodes.append(result)
def process(self, buf):
"""
process an iterable of chars into Hy internal models of the Source.
"""
for char in buf:
self.column += 1
if char == "\n":
self.line += 1
self.column = 0
if self.submachine:
self.submachine.process([char])
if type(self.submachine.state) == Idle:
if len(self.submachine.nodes) > 1:
raise LexException("Funky Submachine stuff")
nodes = self.submachine.nodes
self.submachine = None
if nodes != []:
self.state.nodes.append(nodes[0])
continue
new = self.state.process(char)
if new:
self.set_state(new)

View File

@ -1,396 +0,0 @@
# Copyright (c) 2013 Paul Tagliamonte <paultag@debian.org>
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
from hy.models.expression import HyExpression
from hy.models.integer import HyInteger
from hy.models.lambdalist import HyLambdaListKeyword
from hy.models.float import HyFloat
from hy.models.complex import HyComplex
from hy.models.symbol import HySymbol
from hy.models.string import HyString
from hy.models.keyword import HyKeyword
from hy.models.dict import HyDict
from hy.models.list import HyList
from hy.errors import HyError
from abc import ABCMeta, abstractmethod
WHITESPACE = [" ", "\t", "\n", "\r"]
class LexException(HyError):
"""
Error during the Lexing of a Hython expression.
"""
pass
def _resolve_atom(obj):
"""
Resolve a bare atom into one of the following (in order):
- Integer
- LambdaListKeyword
- Float
- Complex
- Symbol
"""
try:
return HyInteger(obj)
except ValueError:
pass
if obj.startswith("&"):
return HyLambdaListKeyword(obj)
try:
return HyFloat(obj)
except ValueError:
pass
if obj != "j":
try:
return HyComplex(obj)
except ValueError:
pass
table = {
"true": "True",
"false": "False",
"null": "None",
}
if obj in table:
return HySymbol(table[obj])
if obj.startswith(":"):
return HyKeyword(obj)
if obj.startswith("*") and obj.endswith("*") and obj not in ("*", "**"):
obj = obj[1:-1].upper()
if "-" in obj and obj != "-":
obj = obj.replace("-", "_")
return HySymbol(obj)
class State(object):
"""
Generic State model.
"""
__slots__ = ("nodes", "machine")
__metaclass__ = ABCMeta
def __init__(self, machine):
self.machine = machine
def _enter(self):
""" Internal shim for running global ``enter`` code """
self.result = None
self.nodes = []
self.enter()
def _exit(self):
""" Internal shim for running global ``exit`` code """
self.exit()
def enter(self):
"""
Overridable ``enter`` routines. Subclasses may implement this.
"""
pass
def exit(self):
"""
Overridable ``exit`` routines. Subclasses may implement this.
"""
pass
@abstractmethod
def process(self, char):
"""
Overridable ``process`` routines. Subclasses must implement this to be
useful.
"""
pass # ABC
class ListeyThing(State):
def enter(self):
self.buf = ""
def commit(self):
if self.buf != "":
ret = _resolve_atom(self.buf)
ret.start_line = self._start_line
ret.start_column = self._start_column
ret.end_line = self.machine.line
ret.end_column = (self.machine.column - 1)
self.nodes.append(ret)
self.buf = ""
def exit(self):
self.commit()
self.result = self.result_type(self.nodes)
def process(self, char):
if char == "(":
self.commit()
self.machine.sub(Expression)
return
if char == "{":
self.commit()
self.machine.sub(Dict)
return
if char == "[":
self.commit()
self.machine.sub(List)
return
if char == "\"":
self.commit()
self.machine.sub(String)
return
if char == ";":
self.commit()
self.machine.sub(Comment)
return
if char == self.end_char:
return Idle
if char in ")]}":
raise LexException("Unexpected closing character: `%s'" % (char))
if char in WHITESPACE:
self.commit()
return
if self.buf == "":
self._start_line = self.machine.line
self._start_column = self.machine.column
self.buf += char
class List(ListeyThing):
"""
This state parses a Hy list (like a Clojure vector) for use in native
Python interop.
[foo 1 2 3 4] is a good example.
"""
result_type = HyList
end_char = "]"
class Expression(ListeyThing):
"""
This state parses a Hy expression (statement, to be evaluated at runtime)
for running things & stuff.
"""
result_type = HyExpression
end_char = ")"
class Dict(ListeyThing):
"""
This state parses a Hy dict for things.
"""
def exit(self):
self.commit()
self.result = HyDict(self.nodes)
end_char = "}"
class String(State):
"""
String state. This will handle stuff like:
(println "foobar")
^^^^^^^^ -- String
"""
def enter(self):
self.escaped = False
def exit(self):
self.result = HyString("".join(self.nodes))
def process(self, char):
"""
State transitions:
- " - Idle
"""
if self.escaped:
self.escaped = False
simple_escapables = tuple('abfnrtv')
if char in simple_escapables:
self.nodes.append(eval('"\\'+char+'"'))
return
if char == "\\":
self.nodes.append("\\")
return
if char == "\"":
self.nodes.append("\"")
return
raise LexException("Unknown modifier: `%s'" % (char))
if char == "\"":
return Idle
if char == "\\":
self.escaped = True
return
self.nodes.append(char)
class Atom(State):
"""
This state parses integer constants, boolean constants, and symbols
"""
def __init__(self, machine):
State.__init__(self, machine)
self.initial_buf = ''
def enter(self):
self.buf = self.initial_buf
def exit(self):
self.result = _resolve_atom(self.buf)
def process(self, char):
"""
State transitions:
- WHITESPACE - Idle
- ; - Comment
"""
if char in WHITESPACE:
return Idle
if char == ";":
return Comment
self.buf += char
def AtomStartingWith(initial_char):
def AtomFactory(machine):
state = Atom(machine)
state.initial_buf = initial_char
return state
return AtomFactory
class Idle(State):
"""
Idle state. This is the first (and last) thing that we should
be in.
"""
def process(self, char):
"""
State transitions:
- ( - Expression
- [ - List
- { - Dict
- \" - String
- ; - Comment
- # - Hash
- (default) - Atom
"""
if char == "(":
return Expression
if char == "[":
return List
if char == "{":
return Dict
if char == "\"":
return String
if char == ";":
return Comment
if char == "#":
return Hash
if char in WHITESPACE:
return
return AtomStartingWith(char)
class Comment(State):
"""
Comment state.
"""
def process(self, char):
"""
State transitions:
- \n - Idle
- (default) - disregard.
"""
if char == "\n":
return Idle
class Hash(State):
"""
Hash state
"""
def process(self, char):
"""
State transitions:
- ! - Comment
"""
if char == "!":
return Comment
raise LexException("Unknown char (Hash state): `%s'" % (char))

View File

@ -27,9 +27,7 @@ from hy.models.symbol import HySymbol
from hy.models.string import HyString from hy.models.string import HyString
from hy.models.dict import HyDict from hy.models.dict import HyDict
from hy.lex.states import LexException from hy.lex import LexException, PrematureEndOfInput, tokenize
from hy.lex import tokenize
def test_lex_exception(): def test_lex_exception():
@ -37,13 +35,17 @@ def test_lex_exception():
try: try:
tokenize("(foo") tokenize("(foo")
assert True is False assert True is False
except LexException: except PrematureEndOfInput:
pass pass
try: try:
tokenize("&foo&") tokenize("{foo bar")
assert True is False assert True is False
except LexException: except PrematureEndOfInput:
pass
try:
tokenize("(defn foo [bar]")
assert True is False
except PrematureEndOfInput:
pass pass
@ -124,6 +126,8 @@ def test_lex_expression_complex():
assert objs == [HyExpression([HySymbol("foo"), HyComplex(-0.5j)])] assert objs == [HyExpression([HySymbol("foo"), HyComplex(-0.5j)])]
objs = tokenize("(foo 1.e7j)") objs = tokenize("(foo 1.e7j)")
assert objs == [HyExpression([HySymbol("foo"), HyComplex(1.e7j)])] assert objs == [HyExpression([HySymbol("foo"), HyComplex(1.e7j)])]
objs = tokenize("(foo j)")
assert objs == [HyExpression([HySymbol("foo"), HySymbol("j")])]
def test_lex_line_counting(): def test_lex_line_counting():
@ -222,11 +226,17 @@ def test_escapes():
entry = tokenize("(foo \"foo\\n\")")[0] entry = tokenize("(foo \"foo\\n\")")[0]
assert entry[1] == "foo\n" assert entry[1] == "foo\n"
try:
entry = tokenize("(foo \"foo\s\")")[0] entry = tokenize("(foo \"foo\s\")")[0]
assert True is False assert entry[1] == "foo\\s"
except LexException:
pass
def test_unicode_escapes():
"""Ensure unicode escapes are handled correctly"""
s = r'"a\xac\u1234\u20ac\U00008000"'
assert len(s) == 29
entry = tokenize(s)[0]
assert len(entry) == 5
assert [ord(x) for x in entry] == [97, 172, 4660, 8364, 32768]
def test_hashbang(): def test_hashbang():

View File

@ -63,7 +63,7 @@ def test_bin_hy_cmd():
ret = run_cmd("hy -c \"(koan\"") ret = run_cmd("hy -c \"(koan\"")
assert ret[0] == 1 assert ret[0] == 1
assert "LexException" in ret[1] assert "PrematureEndOfInput" in ret[1]
def test_bin_hy_icmd(): def test_bin_hy_icmd():