diff --git a/hy/_compat.py b/hy/_compat.py index a22bb13..fa3b8eb 100644 --- a/hy/_compat.py +++ b/hy/_compat.py @@ -35,3 +35,17 @@ if PY3: else: def raise_empty(t, *args): raise t(*args) + +def isidentifier(x): + if PY3: + return x.isidentifier() + else: + if x.rstrip() != x: + return False + import tokenize as T + from StringIO import StringIO + try: + tokens = list(T.generate_tokens(StringIO(x).readline)) + except T.TokenError: + return False + return len(tokens) == 2 and tokens[0][0] == T.NAME diff --git a/hy/cmdline.py b/hy/cmdline.py index a7126a9..95ec9c5 100644 --- a/hy/cmdline.py +++ b/hy/cmdline.py @@ -63,12 +63,12 @@ class HyREPL(code.InteractiveConsole): elif callable(output_fn): self.output_fn = output_fn else: - f = hy_symbol_mangle(output_fn) if "." in output_fn: - module, f = f.rsplit(".", 1) + parts = [hy_symbol_mangle(x) for x in output_fn.split(".")] + module, f = '.'.join(parts[:-1]), parts[-1] self.output_fn = getattr(importlib.import_module(module), f) else: - self.output_fn = __builtins__[f] + self.output_fn = __builtins__[hy_symbol_mangle(output_fn)] code.InteractiveConsole.__init__(self, locals=locals, filename=filename) diff --git a/hy/compiler.py b/hy/compiler.py index 43bb2ac..2d2bb47 100755 --- a/hy/compiler.py +++ b/hy/compiler.py @@ -81,18 +81,11 @@ if PY35: _decoratables += (ast.AsyncFunctionDef,) -def ast_str(foobar): - if PY3: - return hy_symbol_mangle(str(foobar)) - - try: - return str(hy_symbol_mangle(str(foobar))) - except UnicodeEncodeError: - pass - - enc = codecs.getencoder('punycode') - foobar, _ = enc(foobar) - return "hy_%s" % str(hy_symbol_mangle(foobar)) +def ast_str(x, piecewise=False): + if piecewise: + return ".".join(ast_str(s) if s else "" for s in x.split(".")) + x = hy_symbol_mangle(str_type(x)) + return x if PY3 else x.encode('UTF8') def builds(*types, **kwargs): @@ -1156,19 +1149,16 @@ class HyASTCompiler(object): expr = copy.deepcopy(expr) def _compile_import(expr, module, names=None, importer=asty.Import): if not names: - names = [ast.alias(name=ast_str(module), asname=None)] + names = [ast.alias(name=ast_str(module, piecewise=True), asname=None)] - ast_module = ast_str(module) + ast_module = ast_str(module, piecewise=True) module = ast_module.lstrip(".") level = len(ast_module) - len(module) if not module: module = None - ret = importer(expr, - module=module, - names=names, - level=level) - return Result() + ret + return Result() + importer( + expr, module=module, names=names, level=level) expr.pop(0) # index rimports = Result() @@ -1196,7 +1186,7 @@ class HyASTCompiler(object): "garbage after aliased import") iexpr.pop(0) # :as alias = iexpr.pop(0) - names = [ast.alias(name=ast_str(module), + names = [ast.alias(name=ast_str(module, piecewise=True), asname=ast_str(alias))] rimports += _compile_import(expr, ast_str(module), names) continue @@ -1210,7 +1200,7 @@ class HyASTCompiler(object): alias = ast_str(entry.pop(0)) else: alias = None - names.append(ast.alias(name=ast_str(sym), + names.append(ast.alias(name=(str(sym) if sym == "*" else ast_str(sym)), asname=alias)) rimports += _compile_import(expr, module, @@ -2199,7 +2189,7 @@ class HyASTCompiler(object): ctx=ast.Load()) if ast_str(symbol) in _stdlib: - self.imports[_stdlib[ast_str(symbol)]].add(symbol) + self.imports[_stdlib[ast_str(symbol)]].add(ast_str(symbol)) return asty.Name(symbol, id=ast_str(symbol), ctx=ast.Load()) diff --git a/hy/core/language.hy b/hy/core/language.hy index ad9835c..366d137 100644 --- a/hy/core/language.hy +++ b/hy/core/language.hy @@ -488,7 +488,7 @@ Even objects with the __name__ magic will work." False (or a b))) -(setv *exports* +(setv EXPORTS '[*map accumulate butlast calling-module-name chain coll? combinations comp complement compress cons cons? constantly count cycle dec distinct disassemble drop drop-last drop-while empty? eval even? every? exec first diff --git a/hy/core/shadow.hy b/hy/core/shadow.hy index 7471edb..65acb46 100644 --- a/hy/core/shadow.hy +++ b/hy/core/shadow.hy @@ -163,7 +163,7 @@ (setv coll (get coll k))) coll) -(setv *exports* [ +(setv EXPORTS [ '+ '- '* '** '/ '// '% '@ '<< '>> '& '| '^ '~ '< '> '<= '>= '= '!= @@ -171,4 +171,4 @@ 'is 'is-not 'in 'not-in 'get]) (if (not PY35) - (.remove *exports* '@)) + (.remove EXPORTS '@)) diff --git a/hy/extra/reserved.hy b/hy/extra/reserved.hy index c245224..0cd1fb5 100644 --- a/hy/extra/reserved.hy +++ b/hy/extra/reserved.hy @@ -15,8 +15,8 @@ (if (is _cache None) (do (setv unmangle (. sys.modules ["hy.lex.parser"] hy_symbol_unmangle)) (setv _cache (frozenset (map unmangle (+ - hy.core.language.*exports* - hy.core.shadow.*exports* + hy.core.language.EXPORTS + hy.core.shadow.EXPORTS (list (.keys (get hy.macros._hy_macros None))) keyword.kwlist (list-comp k [k (.keys hy.compiler.-compile-table)] diff --git a/hy/lex/parser.py b/hy/lex/parser.py index e02ec2f..82cbd85 100755 --- a/hy/lex/parser.py +++ b/hy/lex/parser.py @@ -1,3 +1,4 @@ +# -*- encoding: utf-8 -*- # Copyright 2018 the authors. # This file is part of Hy, which is free software licensed under the Expat # license. See the LICENSE. @@ -5,10 +6,11 @@ from __future__ import unicode_literals from functools import wraps +import string, re, unicodedata from rply import ParserGenerator -from hy._compat import str_type +from hy._compat import PY3, str_type, isidentifier from hy.models import (HyBytes, HyComplex, HyCons, HyDict, HyExpression, HyFloat, HyInteger, HyKeyword, HyList, HySet, HyString, HySymbol) @@ -21,43 +23,57 @@ pg = ParserGenerator( cache_id="hy_parser" ) +mangle_delim = 'Δ' if PY3 else 'X' -def hy_symbol_mangle(p): - if p.startswith("*") and p.endswith("*") and p not in ("*", "**"): - p = p[1:-1].upper() +def hy_symbol_mangle(s): + assert s - if "-" in p and p != "-": - p = p.replace("-", "_") + s = s.replace("-", "_") + s2 = s.lstrip('_') + leading_underscores = '_' * (len(s) - len(s2)) + s = s2 - if p.endswith("?") and p != "?": - p = "is_%s" % (p[:-1]) + if s.endswith("?"): + s = 'is_' + s[:-1] + if not isidentifier(leading_underscores + s): + # Replace illegal characters with their Unicode character + # names, or hexadecimal if they don't have one. + s = 'hyx_' + ''.join( + c + if c != mangle_delim and isidentifier('S' + c) + # We prepend the "S" because some characters aren't + # allowed at the start of an identifier. + else '{0}{1}{0}'.format(mangle_delim, + unicodedata.name(c, '').lower().replace('-', 'H').replace(' ', '_') + or 'U{:x}'.format(ord(c))) + for c in s) - if p.endswith("!") and p != "!": - p = "%s_bang" % (p[:-1]) - - return p + s = leading_underscores + s + assert isidentifier(s) + return s -def hy_symbol_unmangle(p): - # hy_symbol_mangle is one-way, so this can't be perfect. - # But it can be useful till we have a way to get the original - # symbol (https://github.com/hylang/hy/issues/360). - p = str_type(p) +def hy_symbol_unmangle(s): + # hy_symbol_mangle is one-way, so this won't round-trip. + s = str_type(s) - if p.endswith("_bang") and p != "_bang": - p = p[:-len("_bang")] + "!" + s2 = s.lstrip('_') + leading_underscores = len(s) - len(s2) + s = s2 - if p.startswith("is_") and p != "is_": - p = p[len("is_"):] + "?" + if s.startswith('hyx_'): + s = re.sub('{0}(U)?([_a-z0-9H]+?){0}'.format(mangle_delim), + lambda mo: + chr(int(mo.group(2), base=16)) + if mo.group(1) + else unicodedata.lookup( + mo.group(2).replace('_', ' ').replace('H', '-').upper()), + s[len('hyx_'):]) + if s.startswith('is_'): + s = s[len("is_"):] + "?" + s = s.replace('_', '-') - if "_" in p and p != "_": - p = p.replace("_", "-") - - if (all([c.isalpha() and c.isupper() or c == '_' for c in p]) and - any([c.isalpha() for c in p])): - p = '*' + p.lower() + '*' - - return p + return '-' * leading_underscores + s def set_boundaries(fun): diff --git a/hy/macros.py b/hy/macros.py index 0613711..ffa5356 100644 --- a/hy/macros.py +++ b/hy/macros.py @@ -2,8 +2,11 @@ # This file is part of Hy, which is free software licensed under the Expat # license. See the LICENSE. +from hy._compat import PY3 import hy.inspect from hy.models import replace_hy_obj, HyExpression, HySymbol +from hy.lex.parser import hy_symbol_mangle + from hy.errors import HyTypeError, HyMacroExpansionError from collections import defaultdict @@ -62,11 +65,14 @@ def tag(name): """ def _(fn): - fn.__name__ = '#{}'.format(name) + _name = hy_symbol_mangle('#{}'.format(name)) + if not PY3: + _name = _name.encode('UTF-8') + fn.__name__ = _name module_name = fn.__module__ if module_name.startswith("hy.core"): module_name = None - _hy_tag[module_name][name] = fn + _hy_tag[module_name][hy_symbol_mangle(name)] = fn return fn return _ diff --git a/tests/native_tests/language.hy b/tests/native_tests/language.hy index ea146c9..60936d8 100644 --- a/tests/native_tests/language.hy +++ b/tests/native_tests/language.hy @@ -738,13 +738,6 @@ (assert (= x 2))) -(defn test-earmuffs [] - "NATIVE: Test earmuffs" - (setv *foo* "2") - (setv foo "3") - (assert (= *foo* FOO)) - (assert (!= *foo* foo))) - (defn test-threading [] "NATIVE: test threading macro" @@ -1112,27 +1105,6 @@ (assert (= ((fn [] (-> 2 (+ 1 1) (* 1 2)))) 8))) -(defn test-symbol-utf-8 [] - "NATIVE: test symbol encoded" - (setv ♥ "love" - ⚘ "flower") - (assert (= (+ ⚘ ♥) "flowerlove"))) - - -(defn test-symbol-dash [] - "NATIVE: test symbol encoded" - (setv ♥-♥ "doublelove" - -_- "what?") - (assert (= ♥-♥ "doublelove")) - (assert (= -_- "what?"))) - - -(defn test-symbol-question-mark [] - "NATIVE: test foo? -> is_foo behavior" - (setv foo? "nachos") - (assert (= is_foo "nachos"))) - - (defn test-and [] "NATIVE: test the and function" @@ -1816,4 +1788,4 @@ macros() (defn test-relative-import [] "Make sure relative imports work properly" (import [..resources [tlib]]) - (assert (= tlib.*secret-message* "Hello World"))) + (assert (= tlib.SECRET-MESSAGE "Hello World"))) diff --git a/tests/native_tests/mangling.hy b/tests/native_tests/mangling.hy new file mode 100644 index 0000000..cb539e3 --- /dev/null +++ b/tests/native_tests/mangling.hy @@ -0,0 +1,127 @@ +;; Copyright 2018 the authors. +;; This file is part of Hy, which is free software licensed under the Expat +;; license. See the LICENSE. + + +(import [hy._compat [PY3]]) + + +(defn test-hyphen [] + (setv a-b 1) + (assert (= a-b 1)) + (assert (= a_b 1)) + (setv -a-_b- 2) + (assert (= -a-_b- 2)) + (assert (= -a--b- 2)) + (assert (= -a__b- 2)) + (setv -_- 3) + (assert (= -_- 3)) + (assert (= --- 3)) + (assert (= ___ 3))) + + +(defn test-underscore-number [] + (setv _42 3) + (assert (= _42 3)) + (assert (!= _42 -42)) + (assert (not (in "_hyx_42" (locals))))) + + +(defn test-question-mark [] + (setv foo? "nachos") + (assert (= foo? "nachos")) + (assert (= is_foo "nachos")) + (setv ___ab_cd? "tacos") + (assert (= ___ab_cd? "tacos")) + (assert (= ___is_ab_cd "tacos"))) + + +(defn test-py-forbidden-ascii [] + + (setv # "no comment") + (assert (= # "no comment")) + (if PY3 + (assert (= hyx_Δnumber_signΔ "no comment")) + (assert (= hyx_Xnumber_signX "no comment"))) + + (setv $ "dosh") + (assert (= $ "dosh")) + (if PY3 + (assert (= hyx_Δdollar_signΔ "dosh")) + (assert (= hyx_Xdollar_signX "dosh")))) + + +(defn test-basic-multilingual-plane [] + (setv ♥ "love" + ⚘ab "flower") + (assert (= (+ ⚘ab ♥) "flowerlove")) + (if PY3 + (assert (= (+ hyx_ΔflowerΔab hyx_Δblack_heart_suitΔ) "flowerlove")) + (assert (= (+ hyx_XflowerXab hyx_Xblack_heart_suitX) "flowerlove"))) + (setv ⚘-⚘ "doubleflower") + (assert (= ⚘-⚘ "doubleflower")) + (if PY3 + (assert (= hyx_ΔflowerΔ_ΔflowerΔ "doubleflower")) + (assert (= hyx_XflowerX_XflowerX "doubleflower"))) + (setv ⚘? "mystery") + (assert (= ⚘? "mystery")) + (if PY3 + (assert (= hyx_is_ΔflowerΔ "mystery")) + (assert (= hyx_is_XflowerX "mystery")))) + + +(defn test-higher-unicode [] + (setv 😂 "emoji") + (assert (= 😂 "emoji")) + (if PY3 + (assert (= hyx_Δface_with_tears_of_joyΔ "emoji")) + (assert (= hyx_XU1f602X "emoji")))) + + +(defn test-nameless-unicode [] + (setv  "private use") + (assert (=  "private use")) + (if PY3 + (assert (= hyx_ΔUe000Δ "private use")) + (assert (= hyx_XUe000X "private use")))) + + +(defn test-charname-with-hyphen [] + (setv a