From a44e53f4de38b02726b8348505a112999d3b8411 Mon Sep 17 00:00:00 2001 From: Nicolas Dandrimont Date: Thu, 2 Jan 2014 03:09:18 +0100 Subject: [PATCH 1/8] Comments end when the input ends or a newline occurs This fixes #382, which occured because the REPL doesn't use trailing newlines. --- hy/lex/lexer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hy/lex/lexer.py b/hy/lex/lexer.py index d4a5638..8032fc0 100644 --- a/hy/lex/lexer.py +++ b/hy/lex/lexer.py @@ -60,7 +60,7 @@ lg.add('STRING', r'''(?x) lg.add('IDENTIFIER', r'[^()\[\]{}\'"\s;]+') -lg.ignore(r';.*[\r\n]+') +lg.ignore(r';.*(?=\r|\n|$)') lg.ignore(r'\s+') From c1d5948d73280e61e48cf42f9aed944e87392aa3 Mon Sep 17 00:00:00 2001 From: Nicolas Dandrimont Date: Thu, 2 Jan 2014 03:13:49 +0100 Subject: [PATCH 2/8] Add regression test for #382 --- tests/lex/test_lex.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/lex/test_lex.py b/tests/lex/test_lex.py index bda1e3f..af9f286 100644 --- a/tests/lex/test_lex.py +++ b/tests/lex/test_lex.py @@ -260,3 +260,9 @@ def test_reader_macro(): assert entry[0][0] == HySymbol("dispatch_reader_macro") assert entry[0][1] == HyExpression([HySymbol("quote"), HyString("^")]) assert len(entry[0]) == 3 + + +def test_lex_comment_382(): + """Ensure that we can tokenize sources with a comment at the end""" + entry = tokenize("foo ;bar\n;baz") + assert entry == [HySymbol("foo")] From 715158c7dbddd222a5bc3e80266dd648ce47e0d5 Mon Sep 17 00:00:00 2001 From: Bob Tolbert Date: Tue, 31 Dec 2013 15:40:58 -0700 Subject: [PATCH 3/8] Add a "clean" target to make and make.bat --- Makefile | 9 +++++++++ make.bat | 24 +++++++++++++++++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index b8815e2..21f2b21 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,7 @@ all: @echo " - tox" @echo " - d" @echo " - r" + @echo " - clean" @echo "" docs: @@ -60,4 +61,12 @@ else flake8 hy bin tests endif +clean: + @find . -name "*.pyc" -exec rm {} \; + @find -name __pycache__ -delete + @${RM} -r -f .tox + @${RM} -r -f dist + @${RM} -r -f *.egg-info + @${RM} -r -f docs/_build + .PHONY: docs diff --git a/make.bat b/make.bat index ccf6138..7046e96 100644 --- a/make.bat +++ b/make.bat @@ -20,8 +20,9 @@ if "%1" == "help" ( echo. - tox echo. - d echo. - r + echo. - clean echo. - goto end + goto :EOF ) if "%1" == "docs" ( @@ -109,8 +110,25 @@ if "%1" == "r" ( goto :EOF ) -if "%1" == full ( +if "%1" == "full" ( call :docs call :d call :tox -) \ No newline at end of file +goto :EOF +) + +if "%1" == "clean" ( +:clean + if EXIST hy\*.pyc cmd /C del /S /Q hy\*.pyc + if EXIST tests\*pyc cmd /C del /S /Q tests\*pyc + for /r %%R in (__pycache__) do if EXIST %%R (rmdir /S /Q %%R) + if EXIST .tox\NUL cmd /C rmdir /S /Q .tox + if EXIST dist\NUL cmd /C rmdir /S /Q dist + if EXIST hy.egg-info\NUL cmd /C rmdir /S /Q hy.egg-info + if EXIST docs\_build\NUL cmd /C rmdir /S /Q docs\_build + goto :EOF +) + +echo.Error: '%1' - unknown target +echo. +goto :help From 9a128edcb23294f6bb6044dab6431a0253ab9447 Mon Sep 17 00:00:00 2001 From: Nicolas Dandrimont Date: Fri, 3 Jan 2014 17:17:51 +0100 Subject: [PATCH 4/8] Pretty print the AST in hy2py --- bin/hy2py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bin/hy2py b/bin/hy2py index 125fd26..9a1199e 100755 --- a/bin/hy2py +++ b/bin/hy2py @@ -6,7 +6,6 @@ from hy.importer import (import_file_to_ast, import_file_to_module, import astor.codegen import sys -import ast module_name = "" @@ -17,7 +16,7 @@ print("") _ast = import_file_to_ast(sys.argv[1], module_name) print("") print("") -print(ast.dump(_ast)) +print(astor.dump(_ast)) print("") print("") print(astor.codegen.to_source(_ast)) From 9e02eaca2663eb7c428fe55e73443985ec44a3b0 Mon Sep 17 00:00:00 2001 From: Nicolas Dandrimont Date: Thu, 2 Jan 2014 15:50:42 +0100 Subject: [PATCH 5/8] Whitespace fix --- docs/language/internals.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/language/internals.rst b/docs/language/internals.rst index b9513f6..f6d1c3c 100644 --- a/docs/language/internals.rst +++ b/docs/language/internals.rst @@ -251,7 +251,7 @@ so our re-written ``nif`` would look like: [(zero? ~g) ~zero-form] [(neg? ~g) ~neg-form])))) -Finally, though we can make a new macro that does all this for us. :ref:`defmacro/g!` +Finally, though we can make a new macro that does all this for us. :ref:`defmacro/g!` will take all symbols that begin with ``g!`` and automatically call ``gensym`` with the remainder of the symbol. So ``g!a`` would become ``(gensym "a")``. From 72a300c6e3980478fc45e3cce022d93f49cb063c Mon Sep 17 00:00:00 2001 From: Nicolas Dandrimont Date: Thu, 2 Jan 2014 16:27:32 +0100 Subject: [PATCH 6/8] Add some documentation about models --- docs/language/internals.rst | 170 ++++++++++++++++++++++++++++++++++-- 1 file changed, 164 insertions(+), 6 deletions(-) diff --git a/docs/language/internals.rst b/docs/language/internals.rst index f6d1c3c..05bd4f1 100644 --- a/docs/language/internals.rst +++ b/docs/language/internals.rst @@ -5,12 +5,172 @@ Internal Hy Documentation .. note:: These bits are for folks who hack on Hy itself, mostly! +.. _models: Hy Models ========= -.. todo:: - Write this. +Introduction to Hy models +------------------------- + +Hy models are a very thin layer on top of regular Python objects, +representing Hy source code as data. Models only add source position +information, and a handful of methods to support clean manipulation of +Hy source code, for instance in macros. To achieve that goal, Hy models +are mixins of a base Python class and :ref:`HyObject`. + +.. _hyobject: + +HyObject +~~~~~~~~ + +``hy.models.HyObject`` is the base class of Hy models. It only +implements one method, ``replace``, which replaces the source position +of the current object with the one passed as argument. This allows us to +keep track of the original position of expressions that get modified by +macros, be that in the compiler or in pure hy macros. + +``HyObject`` is not intended to be used directly to instantiate Hy +models, but only as a mixin for other classes. + +Compound models +--------------- + +Parenthesized and bracketed lists are parsed as compound models by the +Hy parser. + +.. _hylist: + +HyList +~~~~~~ + +``hy.models.list.HyList`` is the base class of "iterable" Hy models. Its +basic use is to represent bracketed ``[]`` lists, which, when used as a +top-level expression, translate to Python list literals in the +compilation phase. + +Adding a HyList to another iterable object reuses the class of the +left-hand-side object, a useful behavior when you want to concatenate Hy +objects in a macro, for instance. + +.. _hyexpression: + +HyExpression +~~~~~~~~~~~~ + +``hy.models.expression.HyExpression`` inherits :ref:`HyList` for +parenthesized ``()`` expressions. The compilation result of those +expressions depends on the first element of the list: the compiler +dispatches expressions between compiler special-forms, user-defined +macros, and regular Python function calls. + +.. _hydict: + +HyDict +~~~~~~ + +``hy.models.dict.HyDict`` inherits :ref:`HyList` for curly-bracketed ``{}`` +expressions, which compile down to a Python dictionary literal. + +The decision of using a list instead of a dict as the base class for +``HyDict`` allows easier manipulation of dicts in macros, with the added +benefit of allowing compound expressions as dict keys (as, for instance, +the :ref:`HyExpression` Python class isn't hashable). + +Atomic models +------------- + +In the input stream, double-quoted strings, respecting the Python +notation for strings, are parsed as a single token, which is directly +parsed as a :ref:`HyString`. + +An ininterrupted string of characters, excluding spaces, brackets, +quotes, double-quotes and comments, is parsed as an identifier. + +Identifiers are resolved to atomic models during the parsing phase in +the following order: + + - :ref:`HyInteger ` + - :ref:`HyFloat ` + - :ref:`HyComplex ` (if the atom isn't a bare ``j``) + - :ref:`HyKeyword` (if the atom starts with ``:``) + - :ref:`HyLambdaListKeyword` (if the atom starts with ``&``) + - :ref:`HySymbol` + +.. _hystring: + +HyString +~~~~~~~~ + +``hy.models.string.HyString`` is the base class of string-equivalent Hy +models. It also represents double-quoted string literals, ``""``, which +compile down to unicode string literals in Python. ``HyStrings`` inherit +unicode objects in Python 2, and string objects in Python 3 (and are +therefore not encoding-dependent). + +``HyString`` based models are immutable. + +Hy literal strings can span multiple lines, and are considered by the +parser as a single unit, respecting the Python escapes for unicode +strings. + +.. _hy_numeric_models: + +Numeric models +~~~~~~~~~~~~~~ + +``hy.models.integer.HyInteger`` represents integer literals (using the +``long`` type on Python 2, and ``int`` on Python 3). + +``hy.models.float.HyFloat`` represents floating-point literals. + +``hy.models.complex.HyComplex`` represents complex literals. + +Numeric models are parsed using the corresponding Python routine, and +valid numeric python literals will be turned into their Hy counterpart. + +.. _hysymbol: + +HySymbol +~~~~~~~~ + +``hy.models.symbol.HySymbol`` is the model used to represent symbols +in the Hy language. It inherits :ref:`HyString`. + +``HySymbol`` objects are mangled in the parsing phase, to help Python +interoperability: + + - Symbols surrounded by asterisks (``*``) are turned into uppercase; + - Dashes (``-``) are turned into underscores (``_``); + - One trailing question mark (``?``) is turned into a leading ``is_``. + +Caveat: as the mangling is done during the parsing phase, it is possible +to programmatically generate HySymbols that can't be generated with Hy +source code. Such a mechanism is used by :ref:`gensym` to generate +"uninterned" symbols. + +.. _hykeyword: + +HyKeyword +~~~~~~~~~ + +``hy.models.keyword.HyKeyword`` represents keywords in Hy. Keywords are +symbols starting with a ``:``. The class inherits :ref:`HyString`. + +To distinguish :ref:`HyKeywords ` from :ref:`HySymbols +`, without the possibility of (involuntary) clashes, the +private-use unicode character ``"\uFDD0"`` is prepended to the keyword +literal before storage. + +.. _hylambdalistkeyword: + +HyLambdaListKeyword +~~~~~~~~~~~~~~~~~~~ + +``hy.models.lambdalist.HyLambdaListKeyword`` represents lambda-list +keywords, that is keywords used by the language definition inside +function signatures. Lambda-list keywords are symbols starting with a +``&``. The class inherits :ref:`HyString` Hy Internal Theory @@ -50,11 +210,9 @@ the tokens generated, and return the Hy models. You can think of the Hy models as the "AST" for Hy, it's what Macros operate on (directly), and it's what the compiler uses when it compiles Hy down. -Check the documentation for more information on the Hy models for more -information regarding the Hy models, and what they mean. - -.. TODO: Uh, we should, like, document models. +.. seealso:: + Section :ref:`models` for more information on Hy models and what they mean. .. _compiling: From 1d5847823bc2202f6750adc292a0bf10b2b87abd Mon Sep 17 00:00:00 2001 From: Nicolas Dandrimont Date: Thu, 2 Jan 2014 16:28:03 +0100 Subject: [PATCH 7/8] more precise disclaimer --- docs/language/internals.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/language/internals.rst b/docs/language/internals.rst index 05bd4f1..33511b5 100644 --- a/docs/language/internals.rst +++ b/docs/language/internals.rst @@ -2,8 +2,8 @@ Internal Hy Documentation ========================= -.. note:: - These bits are for folks who hack on Hy itself, mostly! +.. note:: These bits are mostly useful for folks who hack on Hy itself, + but can also be used for those delving deeper in macro programming. .. _models: From a1895f635c6a17e0a4476af580338aee60fac519 Mon Sep 17 00:00:00 2001 From: Nicolas Dandrimont Date: Thu, 2 Jan 2014 18:09:33 +0100 Subject: [PATCH 8/8] reword the four steps a bit --- docs/language/internals.rst | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/docs/language/internals.rst b/docs/language/internals.rst index 33511b5..a8de44e 100644 --- a/docs/language/internals.rst +++ b/docs/language/internals.rst @@ -181,13 +181,14 @@ Hy Internal Theory Overview -------- -The Hy internals work by acting as a front-end to Python bytecode, so that -Hy it's self compiles down to Python Bytecode, allowing an unmodified Python -runtime to run Hy. +The Hy internals work by acting as a front-end to Python bytecode, so +that Hy itself compiles down to Python Bytecode, allowing an unmodified +Python runtime to run Hy code, without even noticing it. -The way we do this is by translating Hy into Python AST, and building that AST -down into Python bytecode using standard internals, so that we don't have -to duplicate all the work of the Python internals for every single Python +The way we do this is by translating Hy into an internal Python AST +datastructure, and building that AST down into Python bytecode using +modules from the Python standard library, so that we don't have to +duplicate all the work of the Python internals for every single Python release. Hy works in four stages. The following sections will cover each step of Hy @@ -195,8 +196,8 @@ from source to runtime. .. _lexing: -Lexing / tokenizing -------------------- +Steps 1 and 2: Tokenizing and parsing +------------------------------------- The first stage of compiling hy is to lex the source into tokens that we can deal with. We use a project called rply, which is a really nice (and fast) @@ -216,8 +217,8 @@ on (directly), and it's what the compiler uses when it compiles Hy down. .. _compiling: -Compiling ---------- +Step 3: Hy compilation to Python AST +------------------------------------ This is where most of the magic in Hy happens. This is where we take Hy AST (the models), and compile them into Python AST. A couple of funky things happen @@ -329,8 +330,8 @@ into:: By forcing things into an ``ast.expr`` if we can, but the general idea holds. -Runtime -------- +Step 4: Python bytecode output and runtime +------------------------------------------ After we have a Python AST tree that's complete, we can try and compile it to Python bytecode by pushing it through ``eval``. From here on out, we're no