From d501d4d8064847c30e56b766334365e49fc9edcb Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Sun, 25 Feb 2018 16:44:20 -0800 Subject: [PATCH 01/14] Mangle symbols at compile-time instead of parse-time This means that a HySymbol remembers its original name. That is, `a-b` and `a_b` are different symbols although `(setv a-b 1)` and `(setv a_b 1)` set the same variable (namely, `a_b`). Most of the edits in this commit are to switch underscores to hyphens in places where mangling hasn't happened yet. I removed some lexer tests since the lexer no longer does any mangling. --- hy/compiler.py | 64 ++++++++++++++++----------------- hy/contrib/hy_repr.hy | 6 ++-- hy/core/language.hy | 2 +- hy/lex/parser.py | 10 +++--- hy/models.py | 2 +- tests/native_tests/operators.hy | 6 ++-- tests/test_lex.py | 62 +++----------------------------- 7 files changed, 49 insertions(+), 103 deletions(-) diff --git a/hy/compiler.py b/hy/compiler.py index 1132b65..43bb2ac 100755 --- a/hy/compiler.py +++ b/hy/compiler.py @@ -53,7 +53,7 @@ def load_stdlib(): import hy.core for module in hy.core.STDLIB: mod = importlib.import_module(module) - for e in mod.EXPORTS: + for e in map(ast_str, mod.EXPORTS): if getattr(mod, e) is not getattr(builtins, e, ''): # Don't bother putting a name in _stdlib if it # points to a builtin with the same name. This @@ -83,16 +83,16 @@ if PY35: def ast_str(foobar): if PY3: - return str(foobar) + return hy_symbol_mangle(str(foobar)) try: - return str(foobar) + return str(hy_symbol_mangle(str(foobar))) except UnicodeEncodeError: pass enc = codecs.getencoder('punycode') foobar, _ = enc(foobar) - return "hy_%s" % (str(foobar).replace("-", "_")) + return "hy_%s" % str(hy_symbol_mangle(foobar)) def builds(*types, **kwargs): @@ -379,7 +379,7 @@ def is_unpack(kind, x): return (isinstance(x, HyExpression) and len(x) > 0 and isinstance(x[0], HySymbol) - and x[0] == "unpack_" + kind) + and x[0] == "unpack-" + kind) def ends_with_else(expr): @@ -699,17 +699,17 @@ class HyASTCompiler(object): """ if level == 0: if isinstance(form, HyExpression): - if form and form[0] in ("unquote", "unquote_splice"): + if form and form[0] in ("unquote", "unquote-splice"): if len(form) != 2: raise HyTypeError(form, ("`%s' needs 1 argument, got %s" % form[0], len(form) - 1)) - return set(), form[1], (form[0] == "unquote_splice") + return set(), form[1], (form[0] == "unquote-splice") if isinstance(form, HyExpression): if form and form[0] == "quasiquote": level += 1 - if form and form[0] in ("unquote", "unquote_splice"): + if form and form[0] in ("unquote", "unquote-splice"): level -= 1 name = form.__class__.__name__ @@ -783,12 +783,12 @@ class HyASTCompiler(object): ret.add_imports("hy", imports) return ret - @builds("unquote", "unquote_splicing") + @builds("unquote", "unquote-splicing") def compile_unquote(self, expr): raise HyTypeError(expr, "`%s' can't be used at the top-level" % expr[0]) - @builds("unpack_iterable") + @builds("unpack-iterable") @checkargs(exact=1) def compile_unpack_iterable(self, expr): if not PY3: @@ -797,7 +797,7 @@ class HyASTCompiler(object): ret += asty.Starred(expr, value=ret.force_expr, ctx=ast.Load()) return ret - @builds("unpack_mapping") + @builds("unpack-mapping") @checkargs(exact=1) def compile_unpack_mapping(self, expr): raise HyTypeError(expr, "`unpack-mapping` isn't allowed here") @@ -1143,12 +1143,12 @@ class HyASTCompiler(object): ret += self.compile(expr[1]) return ret + asty.Yield(expr, value=ret.force_expr) - @builds("yield_from", iff=PY3) + @builds("yield-from", iff=PY3) @builds("await", iff=PY35) @checkargs(1) def compile_yield_from_or_await_expression(self, expr): ret = Result() + self.compile(expr[1]) - node = asty.YieldFrom if expr[0] == "yield_from" else asty.Await + node = asty.YieldFrom if expr[0] == "yield-from" else asty.Await return ret + node(expr, value=ret.force_expr) @builds("import") @@ -1307,7 +1307,7 @@ class HyASTCompiler(object): slice=ast.Slice(lower=nodes[1], upper=nodes[2], step=nodes[3]), ctx=ast.Load()) - @builds("with_decorator") + @builds("with-decorator") @checkargs(min=1) def compile_decorate_expression(self, expr): expr.pop(0) # with-decorator @@ -1403,7 +1403,7 @@ class HyASTCompiler(object): return gen_res + cond, gen - @builds("list_comp", "set_comp", "genexpr") + @builds("list-comp", "set-comp", "genexpr") @checkargs(min=2, max=3) def compile_comprehension(self, expr): # (list-comp expr (target iter) cond?) @@ -1421,13 +1421,13 @@ class HyASTCompiler(object): ret = self.compile(expression) node_class = ( - asty.ListComp if form == "list_comp" else - asty.SetComp if form == "set_comp" else + asty.ListComp if form == "list-comp" else + asty.SetComp if form == "set-comp" else asty.GeneratorExp) return ret + gen_res + node_class( expr, elt=ret.force_expr, generators=gen) - @builds("dict_comp") + @builds("dict-comp") @checkargs(min=3, max=4) def compile_dict_comprehension(self, expr): expr.pop(0) # dict-comp @@ -1558,8 +1558,8 @@ class HyASTCompiler(object): ops = {"=": ast.Eq, "!=": ast.NotEq, "<": ast.Lt, "<=": ast.LtE, ">": ast.Gt, ">=": ast.GtE, - "is": ast.Is, "is_not": ast.IsNot, - "in": ast.In, "not_in": ast.NotIn} + "is": ast.Is, "is-not": ast.IsNot, + "in": ast.In, "not-in": ast.NotIn} inv = expression.pop(0) ops = [ops[inv]() for _ in range(len(expression) - 1)] @@ -1578,12 +1578,12 @@ class HyASTCompiler(object): asty.Name(expression, id="True", ctx=ast.Load())) return self._compile_compare_op_expression(expression) - @builds("!=", "is_not") + @builds("!=", "is-not") @checkargs(min=2) def compile_compare_op_expression_coll(self, expression): return self._compile_compare_op_expression(expression) - @builds("in", "not_in") + @builds("in", "not-in") @checkargs(2) def compile_compare_op_expression_binary(self, expression): return self._compile_compare_op_expression(expression) @@ -1680,7 +1680,7 @@ class HyASTCompiler(object): def compile_maths_expression_sub(self, expression): return self._compile_maths_expression_additive(expression) - @builds("+=", "/=", "//=", "*=", "_=", "%=", "**=", "<<=", ">>=", "|=", + @builds("+=", "/=", "//=", "*=", "-=", "%=", "**=", "<<=", ">>=", "|=", "^=", "&=") @builds("@=", iff=PY35) @checkargs(2) @@ -1689,7 +1689,7 @@ class HyASTCompiler(object): "/=": ast.Div, "//=": ast.FloorDiv, "*=": ast.Mult, - "_=": ast.Sub, + "-=": ast.Sub, "%=": ast.Mod, "**=": ast.Pow, "<<=": ast.LShift, @@ -1732,7 +1732,7 @@ class HyASTCompiler(object): if isinstance(fn, HySymbol): # First check if `fn` is a special form, unless it has an - # `unpack_iterable` in it, since Python's operators (`+`, + # `unpack-iterable` in it, since Python's operators (`+`, # etc.) can't unpack. An exception to this exception is that # tuple literals (`,`) can unpack. if fn == "," or not ( @@ -1785,7 +1785,7 @@ class HyASTCompiler(object): # An exception for pulling together keyword args is if we're doing # a typecheck, eg (type :foo) with_kwargs = fn not in ( - "type", "HyKeyword", "keyword", "name", "is_keyword") + "type", "HyKeyword", "keyword", "name", "keyword?") args, ret, keywords, oldpy_star, oldpy_kw = self._compile_collect( expression[1:], with_kwargs, oldpy_unpack=True) @@ -2057,7 +2057,7 @@ class HyASTCompiler(object): pairs = expr[1:] while len(pairs) > 0: k, v = (pairs.pop(0), pairs.pop(0)) - if k == HySymbol("__init__"): + if ast_str(k) == "__init__": v.append(HySymbol("None")) new_args.append(k) new_args.append(v) @@ -2120,7 +2120,7 @@ class HyASTCompiler(object): bases=bases_expr, body=body.stmts) - @builds("dispatch_tag_macro") + @builds("dispatch-tag-macro") @checkargs(exact=2) def compile_dispatch_tag_macro(self, expression): expression.pop(0) # dispatch-tag-macro @@ -2135,14 +2135,14 @@ class HyASTCompiler(object): expr = tag_macroexpand(tag, expression.pop(0), self) return self.compile(expr) - @builds("eval_and_compile", "eval_when_compile") + @builds("eval-and-compile", "eval-when-compile") def compile_eval_and_compile(self, expression, building): expression[0] = HySymbol("do") hy.importer.hy_eval(expression, compile_time_ns(self.module_name), self.module_name) return (self._compile_branch(expression[1:]) - if building == "eval_and_compile" + if building == "eval-and-compile" else Result()) @builds(HyCons) @@ -2198,8 +2198,8 @@ class HyASTCompiler(object): attr=ast_str(local), ctx=ast.Load()) - if symbol in _stdlib: - self.imports[_stdlib[symbol]].add(symbol) + if ast_str(symbol) in _stdlib: + self.imports[_stdlib[ast_str(symbol)]].add(symbol) return asty.Name(symbol, id=ast_str(symbol), ctx=ast.Load()) diff --git a/hy/contrib/hy_repr.hy b/hy/contrib/hy_repr.hy index 627649a..ce2e83b 100644 --- a/hy/contrib/hy_repr.hy +++ b/hy/contrib/hy_repr.hy @@ -75,9 +75,9 @@ 'quote "'" 'quasiquote "`" 'unquote "~" - 'unquote_splice "~@" - 'unpack_iterable "#* " - 'unpack_mapping "#** "}) + 'unquote-splice "~@" + 'unpack-iterable "#* " + 'unpack-mapping "#** "}) (if (and x (symbol? (first x)) (in (first x) syntax)) (+ (get syntax (first x)) (hy-repr (second x))) (+ "(" (-cat x) ")")))) diff --git a/hy/core/language.hy b/hy/core/language.hy index cda8d0a..ad9835c 100644 --- a/hy/core/language.hy +++ b/hy/core/language.hy @@ -87,7 +87,7 @@ If the second argument `codegen` is true, generate python code instead." "Return a generator from the original collection `coll` with no duplicates." (setv seen (set) citer (iter coll)) (for* [val citer] - (if (not_in val seen) + (if (not-in val seen) (do (yield val) (.add seen val))))) diff --git a/hy/lex/parser.py b/hy/lex/parser.py index c5e3c43..e02ec2f 100755 --- a/hy/lex/parser.py +++ b/hy/lex/parser.py @@ -201,7 +201,7 @@ def term_unquote(p): @pg.production("term : UNQUOTESPLICE term") @set_quote_boundaries def term_unquote_splice(p): - return HyExpression([HySymbol("unquote_splice"), p[1]]) + return HyExpression([HySymbol("unquote-splice"), p[1]]) @pg.production("term : HASHSTARS term") @@ -209,9 +209,9 @@ def term_unquote_splice(p): def term_hashstars(p): n_stars = len(p[0].getstr()[1:]) if n_stars == 1: - sym = "unpack_iterable" + sym = "unpack-iterable" elif n_stars == 2: - sym = "unpack_mapping" + sym = "unpack-mapping" else: raise LexException( "Too many stars in `#*` construct (if you want to unpack a symbol " @@ -227,7 +227,7 @@ def hash_other(p): st = p[0].getstr()[1:] str_object = HyString(st) expr = p[1] - return HyExpression([HySymbol("dispatch_tag_macro"), str_object, expr]) + return HyExpression([HySymbol("dispatch-tag-macro"), str_object, expr]) @pg.production("set : HLCURLY list_contents RCURLY") @@ -307,7 +307,7 @@ def t_identifier(p): '`(. )` or `(. )`)', p[0].source_pos.lineno, p[0].source_pos.colno) - return HySymbol(".".join(hy_symbol_mangle(x) for x in obj.split("."))) + return HySymbol(obj) def symbol_like(obj): diff --git a/hy/models.py b/hy/models.py index f071ba2..35ff55a 100644 --- a/hy/models.py +++ b/hy/models.py @@ -338,7 +338,7 @@ class HyCons(HyObject): # Keep unquotes in the cdr of conses if type(cdr) == HyExpression: if len(cdr) > 0 and type(cdr[0]) == HySymbol: - if cdr[0] in ("unquote", "unquote_splice"): + if cdr[0] in ("unquote", "unquote-splice"): return super(HyCons, cls).__new__(cls) return cdr.__class__([wrap_value(car)] + cdr) diff --git a/tests/native_tests/operators.hy b/tests/native_tests/operators.hy index a32d5ab..990cb33 100644 --- a/tests/native_tests/operators.hy +++ b/tests/native_tests/operators.hy @@ -247,9 +247,9 @@ (forbid (f)) (forbid (f "hello")) (defclass C) - (setv x (get {"is_not" (C) "!=" 0} f-name)) - (setv y (get {"is_not" (C) "!=" 1} f-name)) - (setv z (get {"is_not" (C) "!=" 2} f-name)) + (setv x (get {"is-not" (C) "!=" 0} f-name)) + (setv y (get {"is-not" (C) "!=" 1} f-name)) + (setv z (get {"is-not" (C) "!=" 2} f-name)) (assert (is (f x x) False)) (assert (is (f y y) False)) (assert (is (f x y) True)) diff --git a/tests/test_lex.py b/tests/test_lex.py index 8c236c5..776c286 100644 --- a/tests/test_lex.py +++ b/tests/test_lex.py @@ -121,8 +121,8 @@ def test_lex_nan_and_inf(): assert tokenize("INF") == [HySymbol("INF")] assert tokenize("-Inf") == [HyFloat(float("-inf"))] - assert tokenize("-inf") == [HySymbol("_inf")] - assert tokenize("-INF") == [HySymbol("_INF")] + assert tokenize("-inf") == [HySymbol("-inf")] + assert tokenize("-INF") == [HySymbol("-INF")] def test_lex_expression_complex(): @@ -140,7 +140,7 @@ def test_lex_expression_complex(): assert t("nanj") == f(HySymbol("nanj")) assert t("Inf+Infj") == f(HyComplex(complex(float("inf"), float("inf")))) assert t("Inf-Infj") == f(HyComplex(complex(float("inf"), float("-inf")))) - assert t("Inf-INFj") == f(HySymbol("Inf_INFj")) + assert t("Inf-INFj") == f(HySymbol("Inf-INFj")) def test_lex_digit_separators(): @@ -332,7 +332,7 @@ def test_complex(): def test_tag_macro(): """Ensure tag macros are handled properly""" entry = tokenize("#^()") - assert entry[0][0] == HySymbol("dispatch_tag_macro") + assert entry[0][0] == HySymbol("dispatch-tag-macro") assert entry[0][1] == HyString("^") assert len(entry[0]) == 3 @@ -343,60 +343,6 @@ def test_lex_comment_382(): assert entry == [HySymbol("foo")] -def test_lex_mangling_star(): - """Ensure that mangling starred identifiers works according to plan""" - entry = tokenize("*foo*") - assert entry == [HySymbol("FOO")] - entry = tokenize("*") - assert entry == [HySymbol("*")] - entry = tokenize("*foo") - assert entry == [HySymbol("*foo")] - - -def test_lex_mangling_hyphen(): - """Ensure that hyphens get translated to underscores during mangling""" - entry = tokenize("foo-bar") - assert entry == [HySymbol("foo_bar")] - entry = tokenize("-") - assert entry == [HySymbol("-")] - - -def test_lex_mangling_qmark(): - """Ensure that identifiers ending with a question mark get mangled ok""" - entry = tokenize("foo?") - assert entry == [HySymbol("is_foo")] - entry = tokenize("?") - assert entry == [HySymbol("?")] - entry = tokenize("im?foo") - assert entry == [HySymbol("im?foo")] - entry = tokenize(".foo?") - assert entry == [HySymbol(".is_foo")] - entry = tokenize("foo.bar?") - assert entry == [HySymbol("foo.is_bar")] - entry = tokenize("foo?.bar") - assert entry == [HySymbol("is_foo.bar")] - entry = tokenize(".foo?.bar.baz?") - assert entry == [HySymbol(".is_foo.bar.is_baz")] - - -def test_lex_mangling_bang(): - """Ensure that identifiers ending with a bang get mangled ok""" - entry = tokenize("foo!") - assert entry == [HySymbol("foo_bang")] - entry = tokenize("!") - assert entry == [HySymbol("!")] - entry = tokenize("im!foo") - assert entry == [HySymbol("im!foo")] - entry = tokenize(".foo!") - assert entry == [HySymbol(".foo_bang")] - entry = tokenize("foo.bar!") - assert entry == [HySymbol("foo.bar_bang")] - entry = tokenize("foo!.bar") - assert entry == [HySymbol("foo_bang.bar")] - entry = tokenize(".foo!.bar.baz!") - assert entry == [HySymbol(".foo_bang.bar.baz_bang")] - - def test_unmangle(): import sys f = sys.modules["hy.lex.parser"].hy_symbol_unmangle From 52edad28e2aca3ca0e47c4cb8ba462141792e74a Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Tue, 27 Feb 2018 11:53:23 -0800 Subject: [PATCH 02/14] Overhaul mangling rules --- hy/_compat.py | 14 +++ hy/cmdline.py | 6 +- hy/compiler.py | 34 +++----- hy/core/language.hy | 2 +- hy/core/shadow.hy | 4 +- hy/extra/reserved.hy | 4 +- hy/lex/parser.py | 74 +++++++++------- hy/macros.py | 10 ++- tests/native_tests/language.hy | 30 +------ tests/native_tests/mangling.hy | 127 ++++++++++++++++++++++++++++ tests/native_tests/native_macros.hy | 11 +-- tests/test_lex.py | 18 ---- 12 files changed, 221 insertions(+), 113 deletions(-) create mode 100644 tests/native_tests/mangling.hy diff --git a/hy/_compat.py b/hy/_compat.py index a22bb13..fa3b8eb 100644 --- a/hy/_compat.py +++ b/hy/_compat.py @@ -35,3 +35,17 @@ if PY3: else: def raise_empty(t, *args): raise t(*args) + +def isidentifier(x): + if PY3: + return x.isidentifier() + else: + if x.rstrip() != x: + return False + import tokenize as T + from StringIO import StringIO + try: + tokens = list(T.generate_tokens(StringIO(x).readline)) + except T.TokenError: + return False + return len(tokens) == 2 and tokens[0][0] == T.NAME diff --git a/hy/cmdline.py b/hy/cmdline.py index a7126a9..95ec9c5 100644 --- a/hy/cmdline.py +++ b/hy/cmdline.py @@ -63,12 +63,12 @@ class HyREPL(code.InteractiveConsole): elif callable(output_fn): self.output_fn = output_fn else: - f = hy_symbol_mangle(output_fn) if "." in output_fn: - module, f = f.rsplit(".", 1) + parts = [hy_symbol_mangle(x) for x in output_fn.split(".")] + module, f = '.'.join(parts[:-1]), parts[-1] self.output_fn = getattr(importlib.import_module(module), f) else: - self.output_fn = __builtins__[f] + self.output_fn = __builtins__[hy_symbol_mangle(output_fn)] code.InteractiveConsole.__init__(self, locals=locals, filename=filename) diff --git a/hy/compiler.py b/hy/compiler.py index 43bb2ac..2d2bb47 100755 --- a/hy/compiler.py +++ b/hy/compiler.py @@ -81,18 +81,11 @@ if PY35: _decoratables += (ast.AsyncFunctionDef,) -def ast_str(foobar): - if PY3: - return hy_symbol_mangle(str(foobar)) - - try: - return str(hy_symbol_mangle(str(foobar))) - except UnicodeEncodeError: - pass - - enc = codecs.getencoder('punycode') - foobar, _ = enc(foobar) - return "hy_%s" % str(hy_symbol_mangle(foobar)) +def ast_str(x, piecewise=False): + if piecewise: + return ".".join(ast_str(s) if s else "" for s in x.split(".")) + x = hy_symbol_mangle(str_type(x)) + return x if PY3 else x.encode('UTF8') def builds(*types, **kwargs): @@ -1156,19 +1149,16 @@ class HyASTCompiler(object): expr = copy.deepcopy(expr) def _compile_import(expr, module, names=None, importer=asty.Import): if not names: - names = [ast.alias(name=ast_str(module), asname=None)] + names = [ast.alias(name=ast_str(module, piecewise=True), asname=None)] - ast_module = ast_str(module) + ast_module = ast_str(module, piecewise=True) module = ast_module.lstrip(".") level = len(ast_module) - len(module) if not module: module = None - ret = importer(expr, - module=module, - names=names, - level=level) - return Result() + ret + return Result() + importer( + expr, module=module, names=names, level=level) expr.pop(0) # index rimports = Result() @@ -1196,7 +1186,7 @@ class HyASTCompiler(object): "garbage after aliased import") iexpr.pop(0) # :as alias = iexpr.pop(0) - names = [ast.alias(name=ast_str(module), + names = [ast.alias(name=ast_str(module, piecewise=True), asname=ast_str(alias))] rimports += _compile_import(expr, ast_str(module), names) continue @@ -1210,7 +1200,7 @@ class HyASTCompiler(object): alias = ast_str(entry.pop(0)) else: alias = None - names.append(ast.alias(name=ast_str(sym), + names.append(ast.alias(name=(str(sym) if sym == "*" else ast_str(sym)), asname=alias)) rimports += _compile_import(expr, module, @@ -2199,7 +2189,7 @@ class HyASTCompiler(object): ctx=ast.Load()) if ast_str(symbol) in _stdlib: - self.imports[_stdlib[ast_str(symbol)]].add(symbol) + self.imports[_stdlib[ast_str(symbol)]].add(ast_str(symbol)) return asty.Name(symbol, id=ast_str(symbol), ctx=ast.Load()) diff --git a/hy/core/language.hy b/hy/core/language.hy index ad9835c..366d137 100644 --- a/hy/core/language.hy +++ b/hy/core/language.hy @@ -488,7 +488,7 @@ Even objects with the __name__ magic will work." False (or a b))) -(setv *exports* +(setv EXPORTS '[*map accumulate butlast calling-module-name chain coll? combinations comp complement compress cons cons? constantly count cycle dec distinct disassemble drop drop-last drop-while empty? eval even? every? exec first diff --git a/hy/core/shadow.hy b/hy/core/shadow.hy index 7471edb..65acb46 100644 --- a/hy/core/shadow.hy +++ b/hy/core/shadow.hy @@ -163,7 +163,7 @@ (setv coll (get coll k))) coll) -(setv *exports* [ +(setv EXPORTS [ '+ '- '* '** '/ '// '% '@ '<< '>> '& '| '^ '~ '< '> '<= '>= '= '!= @@ -171,4 +171,4 @@ 'is 'is-not 'in 'not-in 'get]) (if (not PY35) - (.remove *exports* '@)) + (.remove EXPORTS '@)) diff --git a/hy/extra/reserved.hy b/hy/extra/reserved.hy index c245224..0cd1fb5 100644 --- a/hy/extra/reserved.hy +++ b/hy/extra/reserved.hy @@ -15,8 +15,8 @@ (if (is _cache None) (do (setv unmangle (. sys.modules ["hy.lex.parser"] hy_symbol_unmangle)) (setv _cache (frozenset (map unmangle (+ - hy.core.language.*exports* - hy.core.shadow.*exports* + hy.core.language.EXPORTS + hy.core.shadow.EXPORTS (list (.keys (get hy.macros._hy_macros None))) keyword.kwlist (list-comp k [k (.keys hy.compiler.-compile-table)] diff --git a/hy/lex/parser.py b/hy/lex/parser.py index e02ec2f..82cbd85 100755 --- a/hy/lex/parser.py +++ b/hy/lex/parser.py @@ -1,3 +1,4 @@ +# -*- encoding: utf-8 -*- # Copyright 2018 the authors. # This file is part of Hy, which is free software licensed under the Expat # license. See the LICENSE. @@ -5,10 +6,11 @@ from __future__ import unicode_literals from functools import wraps +import string, re, unicodedata from rply import ParserGenerator -from hy._compat import str_type +from hy._compat import PY3, str_type, isidentifier from hy.models import (HyBytes, HyComplex, HyCons, HyDict, HyExpression, HyFloat, HyInteger, HyKeyword, HyList, HySet, HyString, HySymbol) @@ -21,43 +23,57 @@ pg = ParserGenerator( cache_id="hy_parser" ) +mangle_delim = 'Δ' if PY3 else 'X' -def hy_symbol_mangle(p): - if p.startswith("*") and p.endswith("*") and p not in ("*", "**"): - p = p[1:-1].upper() +def hy_symbol_mangle(s): + assert s - if "-" in p and p != "-": - p = p.replace("-", "_") + s = s.replace("-", "_") + s2 = s.lstrip('_') + leading_underscores = '_' * (len(s) - len(s2)) + s = s2 - if p.endswith("?") and p != "?": - p = "is_%s" % (p[:-1]) + if s.endswith("?"): + s = 'is_' + s[:-1] + if not isidentifier(leading_underscores + s): + # Replace illegal characters with their Unicode character + # names, or hexadecimal if they don't have one. + s = 'hyx_' + ''.join( + c + if c != mangle_delim and isidentifier('S' + c) + # We prepend the "S" because some characters aren't + # allowed at the start of an identifier. + else '{0}{1}{0}'.format(mangle_delim, + unicodedata.name(c, '').lower().replace('-', 'H').replace(' ', '_') + or 'U{:x}'.format(ord(c))) + for c in s) - if p.endswith("!") and p != "!": - p = "%s_bang" % (p[:-1]) - - return p + s = leading_underscores + s + assert isidentifier(s) + return s -def hy_symbol_unmangle(p): - # hy_symbol_mangle is one-way, so this can't be perfect. - # But it can be useful till we have a way to get the original - # symbol (https://github.com/hylang/hy/issues/360). - p = str_type(p) +def hy_symbol_unmangle(s): + # hy_symbol_mangle is one-way, so this won't round-trip. + s = str_type(s) - if p.endswith("_bang") and p != "_bang": - p = p[:-len("_bang")] + "!" + s2 = s.lstrip('_') + leading_underscores = len(s) - len(s2) + s = s2 - if p.startswith("is_") and p != "is_": - p = p[len("is_"):] + "?" + if s.startswith('hyx_'): + s = re.sub('{0}(U)?([_a-z0-9H]+?){0}'.format(mangle_delim), + lambda mo: + chr(int(mo.group(2), base=16)) + if mo.group(1) + else unicodedata.lookup( + mo.group(2).replace('_', ' ').replace('H', '-').upper()), + s[len('hyx_'):]) + if s.startswith('is_'): + s = s[len("is_"):] + "?" + s = s.replace('_', '-') - if "_" in p and p != "_": - p = p.replace("_", "-") - - if (all([c.isalpha() and c.isupper() or c == '_' for c in p]) and - any([c.isalpha() for c in p])): - p = '*' + p.lower() + '*' - - return p + return '-' * leading_underscores + s def set_boundaries(fun): diff --git a/hy/macros.py b/hy/macros.py index 0613711..ffa5356 100644 --- a/hy/macros.py +++ b/hy/macros.py @@ -2,8 +2,11 @@ # This file is part of Hy, which is free software licensed under the Expat # license. See the LICENSE. +from hy._compat import PY3 import hy.inspect from hy.models import replace_hy_obj, HyExpression, HySymbol +from hy.lex.parser import hy_symbol_mangle + from hy.errors import HyTypeError, HyMacroExpansionError from collections import defaultdict @@ -62,11 +65,14 @@ def tag(name): """ def _(fn): - fn.__name__ = '#{}'.format(name) + _name = hy_symbol_mangle('#{}'.format(name)) + if not PY3: + _name = _name.encode('UTF-8') + fn.__name__ = _name module_name = fn.__module__ if module_name.startswith("hy.core"): module_name = None - _hy_tag[module_name][name] = fn + _hy_tag[module_name][hy_symbol_mangle(name)] = fn return fn return _ diff --git a/tests/native_tests/language.hy b/tests/native_tests/language.hy index ea146c9..60936d8 100644 --- a/tests/native_tests/language.hy +++ b/tests/native_tests/language.hy @@ -738,13 +738,6 @@ (assert (= x 2))) -(defn test-earmuffs [] - "NATIVE: Test earmuffs" - (setv *foo* "2") - (setv foo "3") - (assert (= *foo* FOO)) - (assert (!= *foo* foo))) - (defn test-threading [] "NATIVE: test threading macro" @@ -1112,27 +1105,6 @@ (assert (= ((fn [] (-> 2 (+ 1 1) (* 1 2)))) 8))) -(defn test-symbol-utf-8 [] - "NATIVE: test symbol encoded" - (setv ♥ "love" - ⚘ "flower") - (assert (= (+ ⚘ ♥) "flowerlove"))) - - -(defn test-symbol-dash [] - "NATIVE: test symbol encoded" - (setv ♥-♥ "doublelove" - -_- "what?") - (assert (= ♥-♥ "doublelove")) - (assert (= -_- "what?"))) - - -(defn test-symbol-question-mark [] - "NATIVE: test foo? -> is_foo behavior" - (setv foo? "nachos") - (assert (= is_foo "nachos"))) - - (defn test-and [] "NATIVE: test the and function" @@ -1816,4 +1788,4 @@ macros() (defn test-relative-import [] "Make sure relative imports work properly" (import [..resources [tlib]]) - (assert (= tlib.*secret-message* "Hello World"))) + (assert (= tlib.SECRET-MESSAGE "Hello World"))) diff --git a/tests/native_tests/mangling.hy b/tests/native_tests/mangling.hy new file mode 100644 index 0000000..cb539e3 --- /dev/null +++ b/tests/native_tests/mangling.hy @@ -0,0 +1,127 @@ +;; Copyright 2018 the authors. +;; This file is part of Hy, which is free software licensed under the Expat +;; license. See the LICENSE. + + +(import [hy._compat [PY3]]) + + +(defn test-hyphen [] + (setv a-b 1) + (assert (= a-b 1)) + (assert (= a_b 1)) + (setv -a-_b- 2) + (assert (= -a-_b- 2)) + (assert (= -a--b- 2)) + (assert (= -a__b- 2)) + (setv -_- 3) + (assert (= -_- 3)) + (assert (= --- 3)) + (assert (= ___ 3))) + + +(defn test-underscore-number [] + (setv _42 3) + (assert (= _42 3)) + (assert (!= _42 -42)) + (assert (not (in "_hyx_42" (locals))))) + + +(defn test-question-mark [] + (setv foo? "nachos") + (assert (= foo? "nachos")) + (assert (= is_foo "nachos")) + (setv ___ab_cd? "tacos") + (assert (= ___ab_cd? "tacos")) + (assert (= ___is_ab_cd "tacos"))) + + +(defn test-py-forbidden-ascii [] + + (setv # "no comment") + (assert (= # "no comment")) + (if PY3 + (assert (= hyx_Δnumber_signΔ "no comment")) + (assert (= hyx_Xnumber_signX "no comment"))) + + (setv $ "dosh") + (assert (= $ "dosh")) + (if PY3 + (assert (= hyx_Δdollar_signΔ "dosh")) + (assert (= hyx_Xdollar_signX "dosh")))) + + +(defn test-basic-multilingual-plane [] + (setv ♥ "love" + ⚘ab "flower") + (assert (= (+ ⚘ab ♥) "flowerlove")) + (if PY3 + (assert (= (+ hyx_ΔflowerΔab hyx_Δblack_heart_suitΔ) "flowerlove")) + (assert (= (+ hyx_XflowerXab hyx_Xblack_heart_suitX) "flowerlove"))) + (setv ⚘-⚘ "doubleflower") + (assert (= ⚘-⚘ "doubleflower")) + (if PY3 + (assert (= hyx_ΔflowerΔ_ΔflowerΔ "doubleflower")) + (assert (= hyx_XflowerX_XflowerX "doubleflower"))) + (setv ⚘? "mystery") + (assert (= ⚘? "mystery")) + (if PY3 + (assert (= hyx_is_ΔflowerΔ "mystery")) + (assert (= hyx_is_XflowerX "mystery")))) + + +(defn test-higher-unicode [] + (setv 😂 "emoji") + (assert (= 😂 "emoji")) + (if PY3 + (assert (= hyx_Δface_with_tears_of_joyΔ "emoji")) + (assert (= hyx_XU1f602X "emoji")))) + + +(defn test-nameless-unicode [] + (setv  "private use") + (assert (=  "private use")) + (if PY3 + (assert (= hyx_ΔUe000Δ "private use")) + (assert (= hyx_XUe000X "private use")))) + + +(defn test-charname-with-hyphen [] + (setv a Date: Wed, 15 Nov 2017 15:43:46 -0800 Subject: [PATCH 03/14] Mangle names that coincide with Python keywords --- hy/_compat.py | 29 ++++++++++++++++++----------- hy/compiler.py | 26 ++++---------------------- tests/compilers/test_ast.py | 3 --- tests/native_tests/language.hy | 30 +++++++++++++++--------------- tests/native_tests/mangling.hy | 14 ++++++++++++++ 5 files changed, 51 insertions(+), 51 deletions(-) diff --git a/hy/_compat.py b/hy/_compat.py index fa3b8eb..60dfe6f 100644 --- a/hy/_compat.py +++ b/hy/_compat.py @@ -18,7 +18,7 @@ except ImportError: (x >> 8) & 0xff, (x >> 16) & 0xff, (x >> 24) & 0xff])) -import sys +import sys, keyword PY3 = sys.version_info[0] >= 3 PY35 = sys.version_info >= (3, 5) @@ -37,15 +37,22 @@ else: raise t(*args) def isidentifier(x): + if x in ('True', 'False', 'None', 'print'): + # `print` is special-cased here because Python 2's + # keyword.iskeyword will count it as a keyword, but we + # use the __future__ feature print_function, which makes + # it a non-keyword. + return True + if keyword.iskeyword(x): + return False if PY3: return x.isidentifier() - else: - if x.rstrip() != x: - return False - import tokenize as T - from StringIO import StringIO - try: - tokens = list(T.generate_tokens(StringIO(x).readline)) - except T.TokenError: - return False - return len(tokens) == 2 and tokens[0][0] == T.NAME + if x.rstrip() != x: + return False + import tokenize as T + from StringIO import StringIO + try: + tokens = list(T.generate_tokens(StringIO(x).readline)) + except T.TokenError: + return False + return len(tokens) == 2 and tokens[0][0] == T.NAME diff --git a/hy/compiler.py b/hy/compiler.py index 2d2bb47..0969966 100755 --- a/hy/compiler.py +++ b/hy/compiler.py @@ -61,20 +61,6 @@ def load_stdlib(): _stdlib[e] = module -# True, False and None included here since they -# are assignable in Python 2.* but become -# keywords in Python 3.* -def _is_hy_builtin(name, module_name): - extras = ['True', 'False', 'None'] - if name in extras or keyword.iskeyword(name): - return True - # for non-Hy modules, check for pre-existing name in - # _compile_table - if not module_name.startswith("hy."): - return name in _compile_table - return False - - _compile_table = {} _decoratables = (ast.FunctionDef, ast.ClassDef) if PY35: @@ -386,7 +372,6 @@ def ends_with_else(expr): class HyASTCompiler(object): def __init__(self, module_name): - self.allow_builtins = module_name.startswith("hy.core") self.anon_var_count = 0 self.imports = defaultdict(set) self.module_name = module_name @@ -1803,10 +1788,11 @@ class HyASTCompiler(object): def _compile_assign(self, name, result): str_name = "%s" % name - if (_is_hy_builtin(str_name, self.module_name) and - not self.allow_builtins): + if str_name in (["None"] + (["True", "False"] if PY3 else [])): + # Python 2 allows assigning to True and False, although + # this is rarely wise. raise HyTypeError(name, - "Can't assign to a builtin: `%s'" % str_name) + "Can't assign to `%s'" % str_name) result = self.compile(result) ld_name = self.compile(name) @@ -2082,8 +2068,6 @@ class HyASTCompiler(object): body += self._compile_assign(symb, docstring) body += body.expr_as_stmt() - allow_builtins = self.allow_builtins - self.allow_builtins = True if expressions and isinstance(expressions[0], HyList) \ and not isinstance(expressions[0], HyExpression): expr = expressions.pop(0) @@ -2095,8 +2079,6 @@ class HyASTCompiler(object): for expression in expressions: body += self.compile(rewire_init(macroexpand(expression, self))) - self.allow_builtins = allow_builtins - if not body.stmts: body += asty.Pass(expressions) diff --git a/tests/compilers/test_ast.py b/tests/compilers/test_ast.py index 0ee88cc..ead657a 100644 --- a/tests/compilers/test_ast.py +++ b/tests/compilers/test_ast.py @@ -596,13 +596,11 @@ def test_invalid_list_comprehension(): def test_bad_setv(): """Ensure setv handles error cases""" - cant_compile("(setv if* 1)") cant_compile("(setv (a b) [1 2])") def test_defn(): """Ensure that defn works correctly in various corner cases""" - cant_compile("(defn if* [] 1)") cant_compile("(defn \"hy\" [] 1)") cant_compile("(defn :hy [] 1)") can_compile("(defn &hy [] 1)") @@ -611,7 +609,6 @@ def test_defn(): def test_setv_builtins(): """Ensure that assigning to a builtin fails, unless in a class""" cant_compile("(setv None 42)") - cant_compile("(defn get [&rest args] 42)") can_compile("(defclass A [] (defn get [self] 42))") can_compile(""" (defclass A [] diff --git a/tests/native_tests/language.hy b/tests/native_tests/language.hy index 60936d8..a4f39dd 100644 --- a/tests/native_tests/language.hy +++ b/tests/native_tests/language.hy @@ -65,19 +65,19 @@ (defn test-setv-builtin [] - "NATIVE: test that setv doesn't work on builtins" - (try (eval '(setv False 1)) - (except [e [TypeError]] (assert (in "Can't assign to a builtin" (str e))))) - (try (eval '(setv True 0)) - (except [e [TypeError]] (assert (in "Can't assign to a builtin" (str e))))) + "NATIVE: test that setv doesn't work on names Python can't assign to + and that we can't mangle" (try (eval '(setv None 1)) - (except [e [TypeError]] (assert (in "Can't assign to a builtin" (str e))))) - (try (eval '(defn defclass [] (print "hello"))) - (except [e [TypeError]] (assert (in "Can't assign to a builtin" (str e))))) - (try (eval '(defn get [] (print "hello"))) - (except [e [TypeError]] (assert (in "Can't assign to a builtin" (str e))))) - (try (eval '(defn fn [] (print "hello"))) - (except [e [TypeError]] (assert (in "Can't assign to a builtin" (str e)))))) + (except [e [TypeError]] (assert (in "Can't assign to" (str e))))) + (try (eval '(defn None [] (print "hello"))) + (except [e [TypeError]] (assert (in "Can't assign to" (str e))))) + (when PY3 + (try (eval '(setv False 1)) + (except [e [TypeError]] (assert (in "Can't assign to" (str e))))) + (try (eval '(setv True 0)) + (except [e [TypeError]] (assert (in "Can't assign to" (str e))))) + (try (eval '(defn True [] (print "hello"))) + (except [e [TypeError]] (assert (in "Can't assign to" (str e))))))) (defn test-setv-pairs [] @@ -223,14 +223,14 @@ ; don't be fooled by constructs that look like else (setv s "") - (setv (get (globals) "else") True) + (setv else True) (for [x "abcde"] (+= s x) [else (+= s "_")]) (assert (= s "a_b_c_d_e_")) (setv s "") - (setv (get (globals) "else") True) + (setv else True) (with [(pytest.raises TypeError)] (for [x "abcde"] (+= s x) @@ -329,7 +329,7 @@ ; don't be fooled by constructs that look like else clauses (setv x 2) (setv a []) - (setv (get (globals) "else") True) + (setv else True) (while x (.append a x) (-= x 1) diff --git a/tests/native_tests/mangling.hy b/tests/native_tests/mangling.hy index cb539e3..5ddca3b 100644 --- a/tests/native_tests/mangling.hy +++ b/tests/native_tests/mangling.hy @@ -115,6 +115,20 @@ (assert (= x "aabb"))) +(defn test-python-keyword [] + (setv if 3) + (assert (= if 3)) + (assert (= hyx_if 3))) + + +(defn test-operator [] + (setv + 3) + (assert (= + 3)) + (if PY3 + (assert (= hyx_Δplus_signΔ 3)) + (assert (= hyx_Xplus_signX 3)))) + + (defn test-late-mangling [] ; Mangling should only happen during compilation. (assert (!= 'foo? 'is_foo)) From 0c816f2e837e5c46bc49a1b7250843894f6ff65d Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Sun, 19 Nov 2017 08:35:20 -0800 Subject: [PATCH 04/14] Mangle keyword arguments --- hy/compiler.py | 9 +++++---- tests/native_tests/language.hy | 6 +----- tests/native_tests/mangling.hy | 21 +++++++++++++++++++++ 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/hy/compiler.py b/hy/compiler.py index 0969966..19e8bcb 100755 --- a/hy/compiler.py +++ b/hy/compiler.py @@ -503,10 +503,11 @@ class HyASTCompiler(object): compiled_value = self.compile(value) ret += compiled_value - # no unicode for py2 in ast names - keyword = str(expr[2:]) - if "-" in keyword and keyword != "-": - keyword = keyword.replace("-", "_") + keyword = expr[2:] + if not keyword: + raise HyTypeError(expr, "Can't call a function with the " + "empty keyword") + keyword = ast_str(keyword) keywords.append(asty.keyword( expr, arg=keyword, value=compiled_value.force_expr)) diff --git a/tests/native_tests/language.hy b/tests/native_tests/language.hy index a4f39dd..89f8e5d 100644 --- a/tests/native_tests/language.hy +++ b/tests/native_tests/language.hy @@ -1232,11 +1232,7 @@ (assert (= : :)) (assert (keyword? :)) (assert (!= : ":")) - (assert (= (name :) "")) - - (defn f [&kwargs kwargs] - (list (.items kwargs))) - (assert (= (f : 3) [(, "" 3)]))) + (assert (= (name :) ""))) (defn test-nested-if [] diff --git a/tests/native_tests/mangling.hy b/tests/native_tests/mangling.hy index 5ddca3b..4351436 100644 --- a/tests/native_tests/mangling.hy +++ b/tests/native_tests/mangling.hy @@ -129,6 +129,27 @@ (assert (= hyx_Xplus_signX 3)))) +(defn test-keyword-args [] + + (defn f [a a-b foo? ☘] + [a a-b foo? ☘]) + (assert (= (f :foo? 3 :☘ 4 :a 1 :a-b 2) [1 2 3 4])) + (if PY3 + (assert (= (f :is_foo 3 :hyx_ΔshamrockΔ 4 :a 1 :a_b 2) [1 2 3 4])) + (assert (= (f :is_foo 3 :hyx_XshamrockX 4 :a 1 :a_b 2) [1 2 3 4]))) + + (defn g [&kwargs x] + x) + (setv sk (.format "hyx_{0}shamrock{0}" (if PY3 "Δ" "X"))) + (assert (= (g :foo? 3 :☘ 4 :a 1 :a-b 2) + {"a" 1 "a_b" 2 "is_foo" 3 sk 4})) + (if PY3 + (assert (= (g :is_foo 3 :hyx_ΔshamrockΔ 4 :a 1 :a_b 2) + {"a" 1 "a_b" 2 "is_foo" 3 sk 4})) + (assert (= (g :is_foo 3 :hyx_XshamrockX 4 :a 1 :a_b 2) + {"a" 1 "a_b" 2 "is_foo" 3 sk 4})))) + + (defn test-late-mangling [] ; Mangling should only happen during compilation. (assert (!= 'foo? 'is_foo)) From ccb3ba60922ad3f842c0dff4f6856fe7f2296d95 Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Wed, 15 Nov 2017 16:17:28 -0800 Subject: [PATCH 05/14] Mangle macro names --- hy/macros.py | 8 ++++++-- tests/native_tests/mangling.hy | 9 +++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/hy/macros.py b/hy/macros.py index ffa5356..fe13213 100644 --- a/hy/macros.py +++ b/hy/macros.py @@ -6,6 +6,7 @@ from hy._compat import PY3 import hy.inspect from hy.models import replace_hy_obj, HyExpression, HySymbol from hy.lex.parser import hy_symbol_mangle +from hy._compat import str_type from hy.errors import HyTypeError, HyMacroExpansionError @@ -35,6 +36,7 @@ def macro(name): This function is called from the `defmacro` special form in the compiler. """ + name = hy_symbol_mangle(name) def _(fn): fn.__name__ = '({})'.format(name) try: @@ -95,14 +97,15 @@ def require(source_module, target_module, seen_names = set() if prefix: prefix += "." + assignments = {hy_symbol_mangle(str_type(k)): v for k, v in assignments.items()} for d in _hy_macros, _hy_tag: for name, macro in d[source_module].items(): seen_names.add(name) if all_macros: - d[target_module][prefix + name] = macro + d[target_module][hy_symbol_mangle(prefix + name)] = macro elif name in assignments: - d[target_module][prefix + assignments[name]] = macro + d[target_module][hy_symbol_mangle(prefix + assignments[name])] = macro if not all_macros: unseen = frozenset(assignments.keys()).difference(seen_names) @@ -184,6 +187,7 @@ def macroexpand_1(tree, compiler): opts = {} if isinstance(fn, HySymbol): + fn = hy_symbol_mangle(str_type(fn)) m = _hy_macros[compiler.module_name].get(fn) if m is None: m = _hy_macros[None].get(fn) diff --git a/tests/native_tests/mangling.hy b/tests/native_tests/mangling.hy index 4351436..b83365e 100644 --- a/tests/native_tests/mangling.hy +++ b/tests/native_tests/mangling.hy @@ -106,6 +106,15 @@ (assert (= hyx_Xlatin_capital_letter_xXXskull_and_crossbonesX "treasure")))) +(defmacro m---x [form] + [form form]) +(defn test-macro [] + (setv x "") + (assert (= (m---x (do (+= x "a") 1)) [1 1])) + (assert (= (m___x (do (+= x "b") 2)) [2 2])) + (assert (= x "aabb"))) + + (deftag tm---x [form] [form form]) (defn test-tag-macro [] From 0c8c5dc830e3d0d6b1c88ec92fbb23e7286bd555 Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Wed, 15 Nov 2017 17:21:01 -0800 Subject: [PATCH 06/14] Mangle special forms --- hy/compiler.py | 23 ++++++++++++++--------- hy/contrib/walk.hy | 3 ++- tests/native_tests/mangling.hy | 9 +++++++++ 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/hy/compiler.py b/hy/compiler.py index 19e8bcb..349ceff 100755 --- a/hy/compiler.py +++ b/hy/compiler.py @@ -83,6 +83,8 @@ def builds(*types, **kwargs): def _dec(fn): for t in types: + if isinstance(t, string_types): + t = ast_str(t) _compile_table[t] = fn return fn return _dec @@ -415,6 +417,8 @@ class HyASTCompiler(object): return ret.stmts def compile_atom(self, atom_type, atom): + if isinstance(atom_type, string_types): + atom_type = ast_str(atom_type) if atom_type in _compile_table: # _compile_table[atom_type] is a method for compiling this # type of atom, so call it. If it has an extra parameter, @@ -1530,15 +1534,16 @@ class HyASTCompiler(object): values=[value.force_expr for value in values]) return ret - def _compile_compare_op_expression(self, expression): - ops = {"=": ast.Eq, "!=": ast.NotEq, - "<": ast.Lt, "<=": ast.LtE, - ">": ast.Gt, ">=": ast.GtE, - "is": ast.Is, "is-not": ast.IsNot, - "in": ast.In, "not-in": ast.NotIn} + ops = {"=": ast.Eq, "!=": ast.NotEq, + "<": ast.Lt, "<=": ast.LtE, + ">": ast.Gt, ">=": ast.GtE, + "is": ast.Is, "is-not": ast.IsNot, + "in": ast.In, "not-in": ast.NotIn} + ops = {ast_str(k): v for k, v in ops.items()} - inv = expression.pop(0) - ops = [ops[inv]() for _ in range(len(expression) - 1)] + def _compile_compare_op_expression(self, expression): + inv = ast_str(expression.pop(0)) + ops = [self.ops[inv]() for _ in range(len(expression) - 1)] e = expression[0] exprs, ret, _ = self._compile_collect(expression) @@ -2115,7 +2120,7 @@ class HyASTCompiler(object): compile_time_ns(self.module_name), self.module_name) return (self._compile_branch(expression[1:]) - if building == "eval-and-compile" + if building == "eval_and_compile" else Result()) @builds(HyCons) diff --git a/hy/contrib/walk.hy b/hy/contrib/walk.hy index d61bb62..709c252 100644 --- a/hy/contrib/walk.hy +++ b/hy/contrib/walk.hy @@ -7,6 +7,7 @@ [functools [partial]] [collections [OrderedDict]] [hy.macros [macroexpand :as mexpand]] + [hy.lex.parser [hy-symbol-mangle]] [hy.compiler [HyASTCompiler]]) (defn walk [inner outer form] @@ -257,7 +258,7 @@ Arguments without a header are under None. (= head 'defclass) (self.handle-defclass) (= head 'quasiquote) (self.+quote) ;; must be checked last! - (in head special-forms) (self.handle-special-form) + (in (hy-symbol-mangle (string head)) special-forms) (self.handle-special-form) ;; Not a special form. Traverse it like a coll (self.handle-coll))) diff --git a/tests/native_tests/mangling.hy b/tests/native_tests/mangling.hy index b83365e..14c7e37 100644 --- a/tests/native_tests/mangling.hy +++ b/tests/native_tests/mangling.hy @@ -124,6 +124,15 @@ (assert (= x "aabb"))) +(defn test-special-form [] + (setv not-in 1) + ; We set the variable to make sure that if this test works, it's + ; because we're calling the special form instead of the shadow + ; function. + (assert (is (not-in 2 [1 2 3]) False)) + (assert (is (not_in 2 [1 2 3]) False))) + + (defn test-python-keyword [] (setv if 3) (assert (= if 3)) From ebc9bda7eecb07d156a5ff9867ac12bc6fd83bca Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Sun, 25 Feb 2018 16:42:19 -0800 Subject: [PATCH 07/14] Remove an obsolete test --- tests/native_tests/language.hy | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/native_tests/language.hy b/tests/native_tests/language.hy index 89f8e5d..8f99b1d 100644 --- a/tests/native_tests/language.hy +++ b/tests/native_tests/language.hy @@ -187,14 +187,6 @@ (assert (in "takes a parameter list as second" (str e)))))) -(defn test-alias-names-in-errors [] - "NATIVE: tests that native aliases show the correct names in errors" - (try (eval '(list-comp 1 2 3 4)) - (except [e [Exception]] (assert (in "list_comp" (str e))))) - (try (eval '(set-comp 1 2 3 4)) - (except [e [Exception]] (assert (in "set_comp" (str e)))))) - - (defn test-for-loop [] "NATIVE: test for loops" (setv count1 0 count2 0) From 85968e70dd0c734f8aa81b301f83b891eef387a0 Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Sun, 4 Mar 2018 14:20:46 -0800 Subject: [PATCH 08/14] Add `mangle` and `unmangle` as core functions --- hy/__init__.py | 2 +- hy/cmdline.py | 6 +++--- hy/compiler.py | 6 +++--- hy/contrib/walk.hy | 3 +-- hy/core/language.hy | 5 +++-- hy/extra/reserved.hy | 1 - hy/lex/parser.py | 14 +++++++++++--- hy/macros.py | 16 ++++++++-------- tests/native_tests/mangling.hy | 9 +++++++++ tests/native_tests/native_macros.hy | 9 ++++----- 10 files changed, 43 insertions(+), 28 deletions(-) diff --git a/hy/__init__.py b/hy/__init__.py index 6b986d1..42d3133 100644 --- a/hy/__init__.py +++ b/hy/__init__.py @@ -12,5 +12,5 @@ import hy.importer # NOQA # we import for side-effects. -from hy.core.language import read, read_str # NOQA +from hy.core.language import read, read_str, mangle, unmangle # NOQA from hy.importer import hy_eval as eval # NOQA diff --git a/hy/cmdline.py b/hy/cmdline.py index 95ec9c5..40fede5 100644 --- a/hy/cmdline.py +++ b/hy/cmdline.py @@ -16,7 +16,7 @@ import astor.code_gen import hy from hy.lex import LexException, PrematureEndOfInput -from hy.lex.parser import hy_symbol_mangle +from hy.lex.parser import mangle from hy.compiler import HyTypeError from hy.importer import (hy_eval, import_buffer_to_module, import_file_to_ast, import_file_to_hst, @@ -64,11 +64,11 @@ class HyREPL(code.InteractiveConsole): self.output_fn = output_fn else: if "." in output_fn: - parts = [hy_symbol_mangle(x) for x in output_fn.split(".")] + parts = [mangle(x) for x in output_fn.split(".")] module, f = '.'.join(parts[:-1]), parts[-1] self.output_fn = getattr(importlib.import_module(module), f) else: - self.output_fn = __builtins__[hy_symbol_mangle(output_fn)] + self.output_fn = __builtins__[mangle(output_fn)] code.InteractiveConsole.__init__(self, locals=locals, filename=filename) diff --git a/hy/compiler.py b/hy/compiler.py index 349ceff..bd19739 100755 --- a/hy/compiler.py +++ b/hy/compiler.py @@ -8,7 +8,7 @@ from hy.models import (HyObject, HyExpression, HyKeyword, HyInteger, HyComplex, HyDict, HyCons, wrap_value) from hy.errors import HyCompileError, HyTypeError -from hy.lex.parser import hy_symbol_mangle +from hy.lex.parser import mangle import hy.macros from hy._compat import ( @@ -70,7 +70,7 @@ if PY35: def ast_str(x, piecewise=False): if piecewise: return ".".join(ast_str(s) if s else "" for s in x.split(".")) - x = hy_symbol_mangle(str_type(x)) + x = mangle(x) return x if PY3 else x.encode('UTF8') @@ -2109,7 +2109,7 @@ class HyASTCompiler(object): "Trying to expand a tag macro using `{0}' instead " "of string".format(type(tag).__name__), ) - tag = HyString(hy_symbol_mangle(str(tag))).replace(tag) + tag = HyString(mangle(tag)).replace(tag) expr = tag_macroexpand(tag, expression.pop(0), self) return self.compile(expr) diff --git a/hy/contrib/walk.hy b/hy/contrib/walk.hy index 709c252..7ce171d 100644 --- a/hy/contrib/walk.hy +++ b/hy/contrib/walk.hy @@ -7,7 +7,6 @@ [functools [partial]] [collections [OrderedDict]] [hy.macros [macroexpand :as mexpand]] - [hy.lex.parser [hy-symbol-mangle]] [hy.compiler [HyASTCompiler]]) (defn walk [inner outer form] @@ -258,7 +257,7 @@ Arguments without a header are under None. (= head 'defclass) (self.handle-defclass) (= head 'quasiquote) (self.+quote) ;; must be checked last! - (in (hy-symbol-mangle (string head)) special-forms) (self.handle-special-form) + (in (mangle head) special-forms) (self.handle-special-form) ;; Not a special form. Traverse it like a coll (self.handle-coll))) diff --git a/hy/core/language.hy b/hy/core/language.hy index 366d137..ca0724a 100644 --- a/hy/core/language.hy +++ b/hy/core/language.hy @@ -18,6 +18,7 @@ (import [hy._compat [long-type]]) ; long for python2, int for python3 (import [hy.models [HyCons HySymbol HyKeyword]]) (import [hy.lex [LexException PrematureEndOfInput tokenize]]) +(import [hy.lex.parser [mangle unmangle]]) (import [hy.compiler [HyASTCompiler spoof-positions]]) (import [hy.importer [hy-eval :as eval]]) @@ -495,7 +496,7 @@ Even objects with the __name__ magic will work." filter flatten float? fraction gensym group-by identity inc input instance? integer integer? integer-char? interleave interpose islice iterable? iterate iterator? juxt keyword keyword? last list* macroexpand - macroexpand-1 map merge-with multicombinations name neg? none? nth + macroexpand-1 mangle map merge-with multicombinations name neg? none? nth numeric? odd? partition permutations pos? product range read read-str remove repeat repeatedly rest reduce second some string string? symbol? - take take-nth take-while xor tee zero? zip zip-longest]) + take take-nth take-while unmangle xor tee zero? zip zip-longest]) diff --git a/hy/extra/reserved.hy b/hy/extra/reserved.hy index 0cd1fb5..d7ae23c 100644 --- a/hy/extra/reserved.hy +++ b/hy/extra/reserved.hy @@ -13,7 +13,6 @@ The result of the first call is cached." (global _cache) (if (is _cache None) (do - (setv unmangle (. sys.modules ["hy.lex.parser"] hy_symbol_unmangle)) (setv _cache (frozenset (map unmangle (+ hy.core.language.EXPORTS hy.core.shadow.EXPORTS diff --git a/hy/lex/parser.py b/hy/lex/parser.py index 82cbd85..238f57a 100755 --- a/hy/lex/parser.py +++ b/hy/lex/parser.py @@ -25,9 +25,14 @@ pg = ParserGenerator( mangle_delim = 'Δ' if PY3 else 'X' -def hy_symbol_mangle(s): +def mangle(s): + """Stringify the argument and convert it to a valid Python identifier + according to Hy's mangling rules.""" + assert s + s = str_type(s) + s = s.replace("-", "_") s2 = s.lstrip('_') leading_underscores = '_' * (len(s) - len(s2)) @@ -53,8 +58,11 @@ def hy_symbol_mangle(s): return s -def hy_symbol_unmangle(s): - # hy_symbol_mangle is one-way, so this won't round-trip. +def unmangle(s): + """Stringify the argument and try to convert it to a pretty unmangled + form. This may not round-trip, because different Hy symbol names can + mangle to the same Python identifier.""" + s = str_type(s) s2 = s.lstrip('_') diff --git a/hy/macros.py b/hy/macros.py index fe13213..110c37d 100644 --- a/hy/macros.py +++ b/hy/macros.py @@ -5,7 +5,7 @@ from hy._compat import PY3 import hy.inspect from hy.models import replace_hy_obj, HyExpression, HySymbol -from hy.lex.parser import hy_symbol_mangle +from hy.lex.parser import mangle from hy._compat import str_type from hy.errors import HyTypeError, HyMacroExpansionError @@ -36,7 +36,7 @@ def macro(name): This function is called from the `defmacro` special form in the compiler. """ - name = hy_symbol_mangle(name) + name = mangle(name) def _(fn): fn.__name__ = '({})'.format(name) try: @@ -67,14 +67,14 @@ def tag(name): """ def _(fn): - _name = hy_symbol_mangle('#{}'.format(name)) + _name = mangle('#{}'.format(name)) if not PY3: _name = _name.encode('UTF-8') fn.__name__ = _name module_name = fn.__module__ if module_name.startswith("hy.core"): module_name = None - _hy_tag[module_name][hy_symbol_mangle(name)] = fn + _hy_tag[module_name][mangle(name)] = fn return fn return _ @@ -97,15 +97,15 @@ def require(source_module, target_module, seen_names = set() if prefix: prefix += "." - assignments = {hy_symbol_mangle(str_type(k)): v for k, v in assignments.items()} + assignments = {mangle(str_type(k)): v for k, v in assignments.items()} for d in _hy_macros, _hy_tag: for name, macro in d[source_module].items(): seen_names.add(name) if all_macros: - d[target_module][hy_symbol_mangle(prefix + name)] = macro + d[target_module][mangle(prefix + name)] = macro elif name in assignments: - d[target_module][hy_symbol_mangle(prefix + assignments[name])] = macro + d[target_module][mangle(prefix + assignments[name])] = macro if not all_macros: unseen = frozenset(assignments.keys()).difference(seen_names) @@ -187,7 +187,7 @@ def macroexpand_1(tree, compiler): opts = {} if isinstance(fn, HySymbol): - fn = hy_symbol_mangle(str_type(fn)) + fn = mangle(str_type(fn)) m = _hy_macros[compiler.module_name].get(fn) if m is None: m = _hy_macros[None].get(fn) diff --git a/tests/native_tests/mangling.hy b/tests/native_tests/mangling.hy index 14c7e37..032e205 100644 --- a/tests/native_tests/mangling.hy +++ b/tests/native_tests/mangling.hy @@ -178,3 +178,12 @@ (setv ~sym 10) [foo? is_foo]))) (assert (= out [10 10]))) + + +(defn test-functions [] + (for [[a b] [ + ["---ab-cd?" "___is_ab_cd"] + ["if" "hyx_if"] + ["⚘-⚘" (if PY3 "hyx_ΔflowerΔ_ΔflowerΔ" "hyx_XflowerX_XflowerX")]]] + (assert (= (mangle a) b)) + (assert (= (unmangle b) a)))) diff --git a/tests/native_tests/native_macros.hy b/tests/native_tests/native_macros.hy index 9d09b73..0eb7d03 100644 --- a/tests/native_tests/native_macros.hy +++ b/tests/native_tests/native_macros.hy @@ -3,7 +3,6 @@ ;; license. See the LICENSE. (import [hy.errors [HyTypeError]]) -(import [hy.lex.parser [hy-symbol-mangle]]) (defmacro rev [&rest body] "Execute the `body` statements in reverse" @@ -164,8 +163,8 @@ (setv s1 (to_source _ast1)) (setv s2 (to_source _ast2)) ;; and make sure there is something new that starts with _;G| - (assert (in (hy-symbol-mangle "_;G|") s1)) - (assert (in (hy-symbol-mangle "_;G|") s2)) + (assert (in (mangle "_;G|") s1)) + (assert (in (mangle "_;G|") s2)) ;; but make sure the two don't match each other (assert (not (= s1 s2)))) @@ -189,8 +188,8 @@ (setv _ast2 (import_buffer_to_ast macro1 "foo")) (setv s1 (to_source _ast1)) (setv s2 (to_source _ast2)) - (assert (in (hy-symbol-mangle "_;a|") s1)) - (assert (in (hy-symbol-mangle "_;a|") s2)) + (assert (in (mangle "_;a|") s1)) + (assert (in (mangle "_;a|") s2)) (assert (not (= s1 s2)))) (defn test-defmacro-g! [] From 3c97d2982cc2bfcc959d9d20a4075297a2b015df Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Sun, 4 Mar 2018 14:44:56 -0800 Subject: [PATCH 09/14] Use `*1` instead of `_` for REPL history `_`, as a variable, is now the shadow subtraction operator. --- NEWS.rst | 4 ++++ hy/cmdline.py | 4 ++-- tests/test_bin.py | 5 +++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/NEWS.rst b/NEWS.rst index a1b503f..d41a3d0 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -5,6 +5,10 @@ Unreleased Other Breaking Changes ------------------------------ +* `_` and `-` are now equivalent as single-character names + + * The REPL history variable `_` is now `*1` + * Non-shadow unary `=`, `is`, `<`, etc. now evaluate their argument instead of ignoring it. This change increases consistency a bit and makes accidental unary uses easier to notice. diff --git a/hy/cmdline.py b/hy/cmdline.py index 40fede5..75fe09a 100644 --- a/hy/cmdline.py +++ b/hy/cmdline.py @@ -112,8 +112,8 @@ class HyREPL(code.InteractiveConsole): if value is not None: # Make the last non-None value available to - # the user as `_`. - self.locals['_'] = value + # the user as `*1`. + self.locals[mangle("*1")] = value # Print the value. try: output = self.output_fn(value) diff --git a/tests/test_bin.py b/tests/test_bin.py index 33701ea..94411cc 100644 --- a/tests/test_bin.py +++ b/tests/test_bin.py @@ -74,6 +74,11 @@ def test_bin_hy_stdin_multiline(): assert "'abcd'" in output +def test_bin_hy_history(): + output, _ = run_cmd("hy", '(+ "a" "b")\n(+ *1 "y" "z")') + assert "'abyz'" in output + + def test_bin_hy_stdin_comments(): _, err_empty = run_cmd("hy", '') From 6875ae0e3f1aeb6b18673dc53655c4aba42807f7 Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Sun, 4 Mar 2018 14:52:30 -0800 Subject: [PATCH 10/14] Replace `hyify` with `unmangle` --- hy/core/language.hy | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/hy/core/language.hy b/hy/core/language.hy index ca0724a..75fe799 100644 --- a/hy/core/language.hy +++ b/hy/core/language.hy @@ -454,20 +454,16 @@ as EOF (defaults to an empty string)." "Reads and tokenizes first line of `input`." (read :from-file (StringIO input))) -(defn hyify [text] - "Convert `text` to match hy identifier." - (.replace (string text) "_" "-")) - (defn keyword [value] "Create a keyword from `value`. Strings numbers and even objects with the __name__ magic will work." (if (and (string? value) (value.startswith HyKeyword.PREFIX)) - (hyify value) + (unmangle value) (if (string? value) - (HyKeyword (+ ":" (hyify value))) + (HyKeyword (+ ":" (unmangle value))) (try - (hyify (.__name__ value)) + (unmangle (.__name__ value)) (except [] (HyKeyword (+ ":" (string value)))))))) (defn name [value] @@ -476,11 +472,11 @@ Strings numbers and even objects with the __name__ magic will work." Keyword special character will be stripped. String will be used as is. Even objects with the __name__ magic will work." (if (and (string? value) (value.startswith HyKeyword.PREFIX)) - (hyify (cut value 2)) + (unmangle (cut value 2)) (if (string? value) - (hyify value) + (unmangle value) (try - (hyify (. value __name__)) + (unmangle (. value __name__)) (except [] (string value)))))) (defn xor [a b] From 4d77dd0d4098418d6ab803d0ddff1d2dccb75645 Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Sun, 4 Mar 2018 15:15:24 -0800 Subject: [PATCH 11/14] Spin off syntax documentation from api.rst --- docs/language/api.rst | 138 +-------------------------------------- docs/language/index.rst | 1 + docs/language/syntax.rst | 124 +++++++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+), 137 deletions(-) create mode 100644 docs/language/syntax.rst diff --git a/docs/language/api.rst b/docs/language/api.rst index 2c4a0ac..1fe0f83 100644 --- a/docs/language/api.rst +++ b/docs/language/api.rst @@ -1,142 +1,6 @@ ================= -Hy (the language) -================= - -.. warning:: - This is incomplete; please consider contributing to the documentation - effort. - - -Theory of Hy -============ - -Hy maintains, over everything else, 100% compatibility in both directions -with Python itself. All Hy code follows a few simple rules. Memorize -this, as it's going to come in handy. - -These rules help ensure that Hy code is idiomatic and interfaceable in both -languages. - - - * Symbols in earmuffs will be translated to the upper-cased version of that - string. For example, ``foo`` will become ``FOO``. - - * UTF-8 entities will be encoded using - `punycode `_ and prefixed with - ``hy_``. For instance, ``⚘`` will become ``hy_w7h``, ``♥`` will become - ``hy_g6h``, and ``i♥u`` will become ``hy_iu_t0x``. - - * Symbols that contain dashes will have them replaced with underscores. For - example, ``render-template`` will become ``render_template``. This means - that symbols with dashes will shadow their underscore equivalents, and vice - versa. - -Notes on Syntax -=============== - -numeric literals ----------------- - -In addition to regular numbers, standard notation from Python 3 for non-base 10 -integers is used. ``0x`` for Hex, ``0o`` for Octal, ``0b`` for Binary. - -.. code-block:: clj - - (print 0x80 0b11101 0o102 30) - -Underscores and commas can appear anywhere in a numeric literal except the very -beginning. They have no effect on the value of the literal, but they're useful -for visually separating digits. - -.. code-block:: clj - - (print 10,000,000,000 10_000_000_000) - -Unlike Python, Hy provides literal forms for NaN and infinity: ``NaN``, -``Inf``, and ``-Inf``. - -string literals ---------------- - -Hy allows double-quoted strings (e.g., ``"hello"``), but not single-quoted -strings like Python. The single-quote character ``'`` is reserved for -preventing the evaluation of a form (e.g., ``'(+ 1 1)``), as in most Lisps. - -Python's so-called triple-quoted strings (e.g., ``'''hello'''`` and -``"""hello"""``) aren't supported. However, in Hy, unlike Python, any string -literal can contain newlines. Furthermore, Hy supports an alternative form of -string literal called a "bracket string" similar to Lua's long brackets. -Bracket strings have customizable delimiters, like the here-documents of other -languages. A bracket string begins with ``#[FOO[`` and ends with ``]FOO]``, -where ``FOO`` is any string not containing ``[`` or ``]``, including the empty -string. For example:: - - => (print #[["That's very kind of yuo [sic]" Tom wrote back.]]) - "That's very kind of yuo [sic]" Tom wrote back. - => (print #[==[1 + 1 = 2]==]) - 1 + 1 = 2 - -A bracket string can contain newlines, but if it begins with one, the newline -is removed, so you can begin the content of a bracket string on the line -following the opening delimiter with no effect on the content. Any leading -newlines past the first are preserved. - -Plain string literals support :ref:`a variety of backslash escapes -`. To create a "raw string" that interprets all backslashes -literally, prefix the string with ``r``, as in ``r"slash\not"``. Bracket -strings are always raw strings and don't allow the ``r`` prefix. - -Whether running under Python 2 or Python 3, Hy treats all string literals as -sequences of Unicode characters by default, and allows you to prefix a plain -string literal (but not a bracket string) with ``b`` to treat it as a sequence -of bytes. So when running under Python 3, Hy translates ``"foo"`` and -``b"foo"`` to the identical Python code, but when running under Python 2, -``"foo"`` is translated to ``u"foo"`` and ``b"foo"`` is translated to -``"foo"``. - -.. _syntax-keywords: - -keywords --------- - -An identifier headed by a colon, such as ``:foo``, is a keyword. Keywords -evaluate to a string preceded by the Unicode non-character code point U+FDD0, -like ``"\ufdd0:foo"``, so ``:foo`` and ``":foo"`` aren't equal. However, if a -literal keyword appears in a function call, it's used to indicate a keyword -argument rather than passed in as a value. For example, ``(f :foo 3)`` calls -the function ``f`` with the keyword argument named ``foo`` set to ``3``. Hence, -trying to call a function on a literal keyword may fail: ``(f :foo)`` yields -the error ``Keyword argument :foo needs a value``. To avoid this, you can quote -the keyword, as in ``(f ':foo)``, or use it as the value of another keyword -argument, as in ``(f :arg :foo)``. - -discard prefix --------------- - -Hy supports the Extensible Data Notation discard prefix, like Clojure. -Any form prefixed with ``#_`` is discarded instead of compiled. -This completely removes the form so it doesn't evaluate to anything, -not even None. -It's often more useful than linewise comments for commenting out a -form, because it respects code structure even when part of another -form is on the same line. For example: - -.. code-block:: clj - - => (print "Hy" "cruel" "World!") - Hy cruel World! - => (print "Hy" #_"cruel" "World!") - Hy World! - => (+ 1 1 (print "Math is hard!")) - Math is hard! - Traceback (most recent call last): - ... - TypeError: unsupported operand type(s) for +: 'int' and 'NoneType' - => (+ 1 1 #_(print "Math is hard!")) - 2 - Built-Ins -========= +================= Hy features a number of special forms that are used to help generate correct Python AST. The following are "special" forms, which may have diff --git a/docs/language/index.rst b/docs/language/index.rst index 672063a..1a73bd2 100644 --- a/docs/language/index.rst +++ b/docs/language/index.rst @@ -9,6 +9,7 @@ Contents: cli interop + syntax api core internals diff --git a/docs/language/syntax.rst b/docs/language/syntax.rst new file mode 100644 index 0000000..149265a --- /dev/null +++ b/docs/language/syntax.rst @@ -0,0 +1,124 @@ +============== +Syntax +============== + +Hy maintains, over everything else, 100% compatibility in both directions +with Python itself. All Hy code follows a few simple rules. Memorize +this, as it's going to come in handy. + +These rules help ensure that Hy code is idiomatic and interfaceable in both +languages. + + * Symbols in earmuffs will be translated to the upper-cased version of that + string. For example, ``foo`` will become ``FOO``. + + * UTF-8 entities will be encoded using + `punycode `_ and prefixed with + ``hy_``. For instance, ``⚘`` will become ``hy_w7h``, ``♥`` will become + ``hy_g6h``, and ``i♥u`` will become ``hy_iu_t0x``. + + * Symbols that contain dashes will have them replaced with underscores. For + example, ``render-template`` will become ``render_template``. This means + that symbols with dashes will shadow their underscore equivalents, and vice + versa. + +numeric literals +---------------- + +In addition to regular numbers, standard notation from Python 3 for non-base 10 +integers is used. ``0x`` for Hex, ``0o`` for Octal, ``0b`` for Binary. + +.. code-block:: clj + + (print 0x80 0b11101 0o102 30) + +Underscores and commas can appear anywhere in a numeric literal except the very +beginning. They have no effect on the value of the literal, but they're useful +for visually separating digits. + +.. code-block:: clj + + (print 10,000,000,000 10_000_000_000) + +Unlike Python, Hy provides literal forms for NaN and infinity: ``NaN``, +``Inf``, and ``-Inf``. + +string literals +--------------- + +Hy allows double-quoted strings (e.g., ``"hello"``), but not single-quoted +strings like Python. The single-quote character ``'`` is reserved for +preventing the evaluation of a form (e.g., ``'(+ 1 1)``), as in most Lisps. + +Python's so-called triple-quoted strings (e.g., ``'''hello'''`` and +``"""hello"""``) aren't supported. However, in Hy, unlike Python, any string +literal can contain newlines. Furthermore, Hy supports an alternative form of +string literal called a "bracket string" similar to Lua's long brackets. +Bracket strings have customizable delimiters, like the here-documents of other +languages. A bracket string begins with ``#[FOO[`` and ends with ``]FOO]``, +where ``FOO`` is any string not containing ``[`` or ``]``, including the empty +string. For example:: + + => (print #[["That's very kind of yuo [sic]" Tom wrote back.]]) + "That's very kind of yuo [sic]" Tom wrote back. + => (print #[==[1 + 1 = 2]==]) + 1 + 1 = 2 + +A bracket string can contain newlines, but if it begins with one, the newline +is removed, so you can begin the content of a bracket string on the line +following the opening delimiter with no effect on the content. Any leading +newlines past the first are preserved. + +Plain string literals support :ref:`a variety of backslash escapes +`. To create a "raw string" that interprets all backslashes +literally, prefix the string with ``r``, as in ``r"slash\not"``. Bracket +strings are always raw strings and don't allow the ``r`` prefix. + +Whether running under Python 2 or Python 3, Hy treats all string literals as +sequences of Unicode characters by default, and allows you to prefix a plain +string literal (but not a bracket string) with ``b`` to treat it as a sequence +of bytes. So when running under Python 3, Hy translates ``"foo"`` and +``b"foo"`` to the identical Python code, but when running under Python 2, +``"foo"`` is translated to ``u"foo"`` and ``b"foo"`` is translated to +``"foo"``. + +.. _syntax-keywords: + +keywords +-------- + +An identifier headed by a colon, such as ``:foo``, is a keyword. Keywords +evaluate to a string preceded by the Unicode non-character code point U+FDD0, +like ``"\ufdd0:foo"``, so ``:foo`` and ``":foo"`` aren't equal. However, if a +literal keyword appears in a function call, it's used to indicate a keyword +argument rather than passed in as a value. For example, ``(f :foo 3)`` calls +the function ``f`` with the keyword argument named ``foo`` set to ``3``. Hence, +trying to call a function on a literal keyword may fail: ``(f :foo)`` yields +the error ``Keyword argument :foo needs a value``. To avoid this, you can quote +the keyword, as in ``(f ':foo)``, or use it as the value of another keyword +argument, as in ``(f :arg :foo)``. + +discard prefix +-------------- + +Hy supports the Extensible Data Notation discard prefix, like Clojure. +Any form prefixed with ``#_`` is discarded instead of compiled. +This completely removes the form so it doesn't evaluate to anything, +not even None. +It's often more useful than linewise comments for commenting out a +form, because it respects code structure even when part of another +form is on the same line. For example: + +.. code-block:: clj + + => (print "Hy" "cruel" "World!") + Hy cruel World! + => (print "Hy" #_"cruel" "World!") + Hy World! + => (+ 1 1 (print "Math is hard!")) + Math is hard! + Traceback (most recent call last): + ... + TypeError: unsupported operand type(s) for +: 'int' and 'NoneType' + => (+ 1 1 #_(print "Math is hard!")) + 2 From eda0b89f678c4c12eba0f3aa6a8bc933a83b4cf0 Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Sun, 4 Mar 2018 16:39:54 -0800 Subject: [PATCH 12/14] Document mangling --- docs/language/core.rst | 30 ++++++++++++++++ docs/language/internals.rst | 21 ++++-------- docs/language/interop.rst | 41 ++++------------------ docs/language/syntax.rst | 68 +++++++++++++++++++++++++++---------- 4 files changed, 92 insertions(+), 68 deletions(-) diff --git a/docs/language/core.rst b/docs/language/core.rst index 8838153..3b089ea 100644 --- a/docs/language/core.rst +++ b/docs/language/core.rst @@ -699,6 +699,20 @@ Returns the single step macro expansion of *form*. HySymbol('e'), HySymbol('f')])]) +.. _mangle-fn: + +mangle +------ + +Usage: ``(mangle x)`` + +Stringify the input and translate it according to :ref:`Hy's mangling rules +`. + +.. code-block:: hylang + + => (mangle "foo-bar") + 'foo_bar' .. _merge-with-fn: @@ -1431,6 +1445,22 @@ Returns an iterator from *coll* as long as *pred* returns ``True``. => (list (take-while neg? [ 1 2 3 -4 5])) [] +.. _unmangle-fn: + +unmangle +-------- + +Usage: ``(unmangle x)`` + +Stringify the input and return a string that would :ref:`mangle ` to +it. Note that this isn't a one-to-one operation, and nor is ``mangle``, so +``mangle`` and ``unmangle`` don't always round-trip. + +.. code-block:: hylang + + => (unmangle "foo_bar") + 'foo-bar' + Included itertools ================== diff --git a/docs/language/internals.rst b/docs/language/internals.rst index 48a4985..155ab0a 100644 --- a/docs/language/internals.rst +++ b/docs/language/internals.rst @@ -157,17 +157,8 @@ HySymbol ``hy.models.HySymbol`` is the model used to represent symbols in the Hy language. It inherits :ref:`HyString`. -``HySymbol`` objects are mangled in the parsing phase, to help Python -interoperability: - - - Symbols surrounded by asterisks (``*``) are turned into uppercase; - - Dashes (``-``) are turned into underscores (``_``); - - One trailing question mark (``?``) is turned into a leading ``is_``. - -Caveat: as the mangling is done during the parsing phase, it is possible -to programmatically generate HySymbols that can't be generated with Hy -source code. Such a mechanism is used by :ref:`gensym` to generate -"uninterned" symbols. +Symbols are :ref:`mangled ` when they are compiled +to Python variable names. .. _hykeyword: @@ -340,7 +331,7 @@ Since they have no "value" to Python, this makes working in Hy hard, since doing something like ``(print (if True True False))`` is not just common, it's expected. -As a result, we auto-mangle things using a ``Result`` object, where we offer +As a result, we reconfigure things using a ``Result`` object, where we offer up any ``ast.stmt`` that need to get run, and a single ``ast.expr`` that can be used to get the value of whatever was just run. Hy does this by forcing assignment to things while running. @@ -352,11 +343,11 @@ As example, the Hy:: Will turn into:: if True: - _mangled_name_here = True + _temp_name_here = True else: - _mangled_name_here = False + _temp_name_here = False - print _mangled_name_here + print _temp_name_here OK, that was a bit of a lie, since we actually turn that statement diff --git a/docs/language/interop.rst b/docs/language/interop.rst index df34016..34d61ea 100644 --- a/docs/language/interop.rst +++ b/docs/language/interop.rst @@ -8,6 +8,12 @@ Hy <-> Python interop Despite being a Lisp, Hy aims to be fully compatible with Python. That means every Python module or package can be imported in Hy code, and vice versa. +:ref:`Mangling ` allows variable names to be spelled differently in +Hy and Python. For example, Python's ``str.format_map`` can be written +``str.format-map`` in Hy, and a Hy function named ``valid?`` would be called +``is_valid`` in Python. In Python, you can import Hy's core functions +``mangle`` and ``unmangle`` directly from the ``hy`` package. + Using Python from Hy ==================== @@ -27,41 +33,6 @@ You can use it in Hy: You can also import ``.pyc`` bytecode files, of course. -A quick note about mangling --------- - -In Python, snake_case is used by convention. Lisp dialects tend to use dashes -instead of underscores, so Hy does some magic to give you more pleasant names. - -In the same way, ``UPPERCASE_NAMES`` from Python can be used ``*with-earmuffs*`` -instead. - -You can use either the original names or the new ones. - -Imagine ``example.py``:: - - def function_with_a_long_name(): - print(42) - - FOO = "bar" - -Then, in Hy: - -.. code-block:: clj - - (import example) - (.function-with-a-long-name example) ; prints "42" - (.function_with_a_long_name example) ; also prints "42" - - (print (. example *foo*)) ; prints "bar" - (print (. example FOO)) ; also prints "bar" - -.. warning:: - Mangling isn’t that simple; there is more to discuss about it, yet it doesn’t - belong in this section. -.. TODO: link to mangling section, when it is done - - Using Hy from Python ==================== diff --git a/docs/language/syntax.rst b/docs/language/syntax.rst index 149265a..deed2cb 100644 --- a/docs/language/syntax.rst +++ b/docs/language/syntax.rst @@ -2,25 +2,10 @@ Syntax ============== -Hy maintains, over everything else, 100% compatibility in both directions -with Python itself. All Hy code follows a few simple rules. Memorize -this, as it's going to come in handy. +identifiers +----------- -These rules help ensure that Hy code is idiomatic and interfaceable in both -languages. - - * Symbols in earmuffs will be translated to the upper-cased version of that - string. For example, ``foo`` will become ``FOO``. - - * UTF-8 entities will be encoded using - `punycode `_ and prefixed with - ``hy_``. For instance, ``⚘`` will become ``hy_w7h``, ``♥`` will become - ``hy_g6h``, and ``i♥u`` will become ``hy_iu_t0x``. - - * Symbols that contain dashes will have them replaced with underscores. For - example, ``render-template`` will become ``render_template``. This means - that symbols with dashes will shadow their underscore equivalents, and vice - versa. +An identifier consists of a nonempty sequence of Unicode characters that are not whitespace nor any of the following: ``( ) [ ] { } ' "``. Hy first tries to parse each identifier into a numeric literal, then into a keyword if that fails, and finally into a symbol if that fails. numeric literals ---------------- @@ -98,6 +83,53 @@ the error ``Keyword argument :foo needs a value``. To avoid this, you can quote the keyword, as in ``(f ':foo)``, or use it as the value of another keyword argument, as in ``(f :arg :foo)``. +.. _mangling: + +symbols +------- + +Symbols are identifiers that are neither legal numeric literals nor legal +keywords. In most contexts, symbols are compiled to Python variable names. Some +example symbols are ``hello``, ``+++``, ``3fiddy``, ``$40``, ``just✈wrong``, +and ``🦑``. + +Since the rules for Hy symbols are much more permissive than the rules for +Python identifiers, Hy uses a mangling algorithm to convert its own names to +Python-legal names. The rules are: + +- Convert all hyphens (``-``) to underscores (``_``). Thus, ``foo-bar`` becomes + ``foo_bar``. +- If the name ends with ``?``, remove it and prepend ``is``. Thus, ``tasty?`` + becomes ``is_tasty``. +- If the name still isn't Python-legal, make the following changes. A name + could be Python-illegal because it contains a character that's never legal in + a Python name, it contains a character that's illegal in that position, or + it's equal to a Python reserved word. + + - Prepend ``hyx_`` to the name. + - Replace each illegal character with ``ΔfooΔ`` (or on Python 2, ``XfooX``), + where ``foo`` is the the Unicode character name in lowercase, with spaces + replaced by underscores and hyphens replaced by ``H``. Replace ``Δ`` itself + (or on Python 2, ``X``) the same way. If the character doesn't have a name, + use ``U`` followed by its code point in lowercase hexadecimal. + + Thus, ``green☘`` becomes ``hyx_greenΔshamrockΔ`` and ``if`` becomes + ``hyx_if``. + +- Finally, any added ``hyx_`` or ``is_`` is added after any leading + underscores, because leading underscores have special significance to Python. + Thus, ``_tasty?`` becomes ``_is_tasty`` instead of ``is__tasty``. + +Mangling isn't something you should have to think about often, but you may see +mangled names in error messages, the output of ``hy2py``, etc. A catch to be +aware of is that mangling, as well as the inverse "unmangling" operation +offered by the ``unmangle`` function, isn't one-to-one. Two different symbols +can mangle to the same string and hence compile to the same Python variable. +The chief practical consequence of this is that ``-`` and ``_`` are +interchangeable in all symbol names, so you shouldn't assign to the +one-character name ``_`` , or else you'll interfere with certain uses of +subtraction. + discard prefix -------------- From ca06294c186a8660456942422c23dcba8bacb743 Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Sun, 4 Mar 2018 16:42:38 -0800 Subject: [PATCH 13/14] Update NEWS --- NEWS.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/NEWS.rst b/NEWS.rst index d41a3d0..c8b5923 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -5,7 +5,9 @@ Unreleased Other Breaking Changes ------------------------------ -* `_` and `-` are now equivalent as single-character names +* Mangling rules have been overhauled, such that mangled names + are always legal Python identifiers +* `_` and `-` are now equivalent even as single-character names * The REPL history variable `_` is now `*1` @@ -13,6 +15,10 @@ Other Breaking Changes instead of ignoring it. This change increases consistency a bit and makes accidental unary uses easier to notice. +New Features +------------------------------ +* Added `mangle` and `unmangle` as core functions + Bug Fixes ------------------------------ * Fix `(return)` so it works correctly to exit a Python 2 generator From 4c5dea0756f88cbc0858135b9ac91c8954ce5cc9 Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Sun, 4 Mar 2018 16:46:48 -0800 Subject: [PATCH 14/14] Use io.StringIO instead of the StringIO module --- hy/_compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hy/_compat.py b/hy/_compat.py index 60dfe6f..3c5b7d9 100644 --- a/hy/_compat.py +++ b/hy/_compat.py @@ -50,7 +50,7 @@ def isidentifier(x): if x.rstrip() != x: return False import tokenize as T - from StringIO import StringIO + from io import StringIO try: tokens = list(T.generate_tokens(StringIO(x).readline)) except T.TokenError: