Overhaul mangling rules

This commit is contained in:
Kodi Arfer 2018-02-27 11:53:23 -08:00
parent d501d4d806
commit 52edad28e2
12 changed files with 221 additions and 113 deletions

View File

@ -35,3 +35,17 @@ if PY3:
else: else:
def raise_empty(t, *args): def raise_empty(t, *args):
raise t(*args) raise t(*args)
def isidentifier(x):
if PY3:
return x.isidentifier()
else:
if x.rstrip() != x:
return False
import tokenize as T
from StringIO import StringIO
try:
tokens = list(T.generate_tokens(StringIO(x).readline))
except T.TokenError:
return False
return len(tokens) == 2 and tokens[0][0] == T.NAME

View File

@ -63,12 +63,12 @@ class HyREPL(code.InteractiveConsole):
elif callable(output_fn): elif callable(output_fn):
self.output_fn = output_fn self.output_fn = output_fn
else: else:
f = hy_symbol_mangle(output_fn)
if "." in output_fn: if "." in output_fn:
module, f = f.rsplit(".", 1) parts = [hy_symbol_mangle(x) for x in output_fn.split(".")]
module, f = '.'.join(parts[:-1]), parts[-1]
self.output_fn = getattr(importlib.import_module(module), f) self.output_fn = getattr(importlib.import_module(module), f)
else: else:
self.output_fn = __builtins__[f] self.output_fn = __builtins__[hy_symbol_mangle(output_fn)]
code.InteractiveConsole.__init__(self, locals=locals, code.InteractiveConsole.__init__(self, locals=locals,
filename=filename) filename=filename)

View File

@ -81,18 +81,11 @@ if PY35:
_decoratables += (ast.AsyncFunctionDef,) _decoratables += (ast.AsyncFunctionDef,)
def ast_str(foobar): def ast_str(x, piecewise=False):
if PY3: if piecewise:
return hy_symbol_mangle(str(foobar)) return ".".join(ast_str(s) if s else "" for s in x.split("."))
x = hy_symbol_mangle(str_type(x))
try: return x if PY3 else x.encode('UTF8')
return str(hy_symbol_mangle(str(foobar)))
except UnicodeEncodeError:
pass
enc = codecs.getencoder('punycode')
foobar, _ = enc(foobar)
return "hy_%s" % str(hy_symbol_mangle(foobar))
def builds(*types, **kwargs): def builds(*types, **kwargs):
@ -1156,19 +1149,16 @@ class HyASTCompiler(object):
expr = copy.deepcopy(expr) expr = copy.deepcopy(expr)
def _compile_import(expr, module, names=None, importer=asty.Import): def _compile_import(expr, module, names=None, importer=asty.Import):
if not names: if not names:
names = [ast.alias(name=ast_str(module), asname=None)] names = [ast.alias(name=ast_str(module, piecewise=True), asname=None)]
ast_module = ast_str(module) ast_module = ast_str(module, piecewise=True)
module = ast_module.lstrip(".") module = ast_module.lstrip(".")
level = len(ast_module) - len(module) level = len(ast_module) - len(module)
if not module: if not module:
module = None module = None
ret = importer(expr, return Result() + importer(
module=module, expr, module=module, names=names, level=level)
names=names,
level=level)
return Result() + ret
expr.pop(0) # index expr.pop(0) # index
rimports = Result() rimports = Result()
@ -1196,7 +1186,7 @@ class HyASTCompiler(object):
"garbage after aliased import") "garbage after aliased import")
iexpr.pop(0) # :as iexpr.pop(0) # :as
alias = iexpr.pop(0) alias = iexpr.pop(0)
names = [ast.alias(name=ast_str(module), names = [ast.alias(name=ast_str(module, piecewise=True),
asname=ast_str(alias))] asname=ast_str(alias))]
rimports += _compile_import(expr, ast_str(module), names) rimports += _compile_import(expr, ast_str(module), names)
continue continue
@ -1210,7 +1200,7 @@ class HyASTCompiler(object):
alias = ast_str(entry.pop(0)) alias = ast_str(entry.pop(0))
else: else:
alias = None alias = None
names.append(ast.alias(name=ast_str(sym), names.append(ast.alias(name=(str(sym) if sym == "*" else ast_str(sym)),
asname=alias)) asname=alias))
rimports += _compile_import(expr, module, rimports += _compile_import(expr, module,
@ -2199,7 +2189,7 @@ class HyASTCompiler(object):
ctx=ast.Load()) ctx=ast.Load())
if ast_str(symbol) in _stdlib: if ast_str(symbol) in _stdlib:
self.imports[_stdlib[ast_str(symbol)]].add(symbol) self.imports[_stdlib[ast_str(symbol)]].add(ast_str(symbol))
return asty.Name(symbol, id=ast_str(symbol), ctx=ast.Load()) return asty.Name(symbol, id=ast_str(symbol), ctx=ast.Load())

View File

@ -488,7 +488,7 @@ Even objects with the __name__ magic will work."
False False
(or a b))) (or a b)))
(setv *exports* (setv EXPORTS
'[*map accumulate butlast calling-module-name chain coll? combinations '[*map accumulate butlast calling-module-name chain coll? combinations
comp complement compress cons cons? constantly count cycle dec distinct comp complement compress cons cons? constantly count cycle dec distinct
disassemble drop drop-last drop-while empty? eval even? every? exec first disassemble drop drop-last drop-while empty? eval even? every? exec first

View File

@ -163,7 +163,7 @@
(setv coll (get coll k))) (setv coll (get coll k)))
coll) coll)
(setv *exports* [ (setv EXPORTS [
'+ '- '* '** '/ '// '% '@ '+ '- '* '** '/ '// '% '@
'<< '>> '& '| '^ '~ '<< '>> '& '| '^ '~
'< '> '<= '>= '= '!= '< '> '<= '>= '= '!=
@ -171,4 +171,4 @@
'is 'is-not 'in 'not-in 'is 'is-not 'in 'not-in
'get]) 'get])
(if (not PY35) (if (not PY35)
(.remove *exports* '@)) (.remove EXPORTS '@))

View File

@ -15,8 +15,8 @@
(if (is _cache None) (do (if (is _cache None) (do
(setv unmangle (. sys.modules ["hy.lex.parser"] hy_symbol_unmangle)) (setv unmangle (. sys.modules ["hy.lex.parser"] hy_symbol_unmangle))
(setv _cache (frozenset (map unmangle (+ (setv _cache (frozenset (map unmangle (+
hy.core.language.*exports* hy.core.language.EXPORTS
hy.core.shadow.*exports* hy.core.shadow.EXPORTS
(list (.keys (get hy.macros._hy_macros None))) (list (.keys (get hy.macros._hy_macros None)))
keyword.kwlist keyword.kwlist
(list-comp k [k (.keys hy.compiler.-compile-table)] (list-comp k [k (.keys hy.compiler.-compile-table)]

View File

@ -1,3 +1,4 @@
# -*- encoding: utf-8 -*-
# Copyright 2018 the authors. # Copyright 2018 the authors.
# This file is part of Hy, which is free software licensed under the Expat # This file is part of Hy, which is free software licensed under the Expat
# license. See the LICENSE. # license. See the LICENSE.
@ -5,10 +6,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from functools import wraps from functools import wraps
import string, re, unicodedata
from rply import ParserGenerator from rply import ParserGenerator
from hy._compat import str_type from hy._compat import PY3, str_type, isidentifier
from hy.models import (HyBytes, HyComplex, HyCons, HyDict, HyExpression, from hy.models import (HyBytes, HyComplex, HyCons, HyDict, HyExpression,
HyFloat, HyInteger, HyKeyword, HyList, HySet, HyString, HyFloat, HyInteger, HyKeyword, HyList, HySet, HyString,
HySymbol) HySymbol)
@ -21,43 +23,57 @@ pg = ParserGenerator(
cache_id="hy_parser" cache_id="hy_parser"
) )
mangle_delim = 'Δ' if PY3 else 'X'
def hy_symbol_mangle(p): def hy_symbol_mangle(s):
if p.startswith("*") and p.endswith("*") and p not in ("*", "**"): assert s
p = p[1:-1].upper()
if "-" in p and p != "-": s = s.replace("-", "_")
p = p.replace("-", "_") s2 = s.lstrip('_')
leading_underscores = '_' * (len(s) - len(s2))
s = s2
if p.endswith("?") and p != "?": if s.endswith("?"):
p = "is_%s" % (p[:-1]) s = 'is_' + s[:-1]
if not isidentifier(leading_underscores + s):
# Replace illegal characters with their Unicode character
# names, or hexadecimal if they don't have one.
s = 'hyx_' + ''.join(
c
if c != mangle_delim and isidentifier('S' + c)
# We prepend the "S" because some characters aren't
# allowed at the start of an identifier.
else '{0}{1}{0}'.format(mangle_delim,
unicodedata.name(c, '').lower().replace('-', 'H').replace(' ', '_')
or 'U{:x}'.format(ord(c)))
for c in s)
if p.endswith("!") and p != "!": s = leading_underscores + s
p = "%s_bang" % (p[:-1]) assert isidentifier(s)
return s
return p
def hy_symbol_unmangle(p): def hy_symbol_unmangle(s):
# hy_symbol_mangle is one-way, so this can't be perfect. # hy_symbol_mangle is one-way, so this won't round-trip.
# But it can be useful till we have a way to get the original s = str_type(s)
# symbol (https://github.com/hylang/hy/issues/360).
p = str_type(p)
if p.endswith("_bang") and p != "_bang": s2 = s.lstrip('_')
p = p[:-len("_bang")] + "!" leading_underscores = len(s) - len(s2)
s = s2
if p.startswith("is_") and p != "is_": if s.startswith('hyx_'):
p = p[len("is_"):] + "?" s = re.sub('{0}(U)?([_a-z0-9H]+?){0}'.format(mangle_delim),
lambda mo:
chr(int(mo.group(2), base=16))
if mo.group(1)
else unicodedata.lookup(
mo.group(2).replace('_', ' ').replace('H', '-').upper()),
s[len('hyx_'):])
if s.startswith('is_'):
s = s[len("is_"):] + "?"
s = s.replace('_', '-')
if "_" in p and p != "_": return '-' * leading_underscores + s
p = p.replace("_", "-")
if (all([c.isalpha() and c.isupper() or c == '_' for c in p]) and
any([c.isalpha() for c in p])):
p = '*' + p.lower() + '*'
return p
def set_boundaries(fun): def set_boundaries(fun):

View File

@ -2,8 +2,11 @@
# This file is part of Hy, which is free software licensed under the Expat # This file is part of Hy, which is free software licensed under the Expat
# license. See the LICENSE. # license. See the LICENSE.
from hy._compat import PY3
import hy.inspect import hy.inspect
from hy.models import replace_hy_obj, HyExpression, HySymbol from hy.models import replace_hy_obj, HyExpression, HySymbol
from hy.lex.parser import hy_symbol_mangle
from hy.errors import HyTypeError, HyMacroExpansionError from hy.errors import HyTypeError, HyMacroExpansionError
from collections import defaultdict from collections import defaultdict
@ -62,11 +65,14 @@ def tag(name):
""" """
def _(fn): def _(fn):
fn.__name__ = '#{}'.format(name) _name = hy_symbol_mangle('#{}'.format(name))
if not PY3:
_name = _name.encode('UTF-8')
fn.__name__ = _name
module_name = fn.__module__ module_name = fn.__module__
if module_name.startswith("hy.core"): if module_name.startswith("hy.core"):
module_name = None module_name = None
_hy_tag[module_name][name] = fn _hy_tag[module_name][hy_symbol_mangle(name)] = fn
return fn return fn
return _ return _

View File

@ -738,13 +738,6 @@
(assert (= x 2))) (assert (= x 2)))
(defn test-earmuffs []
"NATIVE: Test earmuffs"
(setv *foo* "2")
(setv foo "3")
(assert (= *foo* FOO))
(assert (!= *foo* foo)))
(defn test-threading [] (defn test-threading []
"NATIVE: test threading macro" "NATIVE: test threading macro"
@ -1112,27 +1105,6 @@
(assert (= ((fn [] (-> 2 (+ 1 1) (* 1 2)))) 8))) (assert (= ((fn [] (-> 2 (+ 1 1) (* 1 2)))) 8)))
(defn test-symbol-utf-8 []
"NATIVE: test symbol encoded"
(setv "love"
"flower")
(assert (= (+ ) "flowerlove")))
(defn test-symbol-dash []
"NATIVE: test symbol encoded"
(setv - "doublelove"
-_- "what?")
(assert (= - "doublelove"))
(assert (= -_- "what?")))
(defn test-symbol-question-mark []
"NATIVE: test foo? -> is_foo behavior"
(setv foo? "nachos")
(assert (= is_foo "nachos")))
(defn test-and [] (defn test-and []
"NATIVE: test the and function" "NATIVE: test the and function"
@ -1816,4 +1788,4 @@ macros()
(defn test-relative-import [] (defn test-relative-import []
"Make sure relative imports work properly" "Make sure relative imports work properly"
(import [..resources [tlib]]) (import [..resources [tlib]])
(assert (= tlib.*secret-message* "Hello World"))) (assert (= tlib.SECRET-MESSAGE "Hello World")))

View File

@ -0,0 +1,127 @@
;; Copyright 2018 the authors.
;; This file is part of Hy, which is free software licensed under the Expat
;; license. See the LICENSE.
(import [hy._compat [PY3]])
(defn test-hyphen []
(setv a-b 1)
(assert (= a-b 1))
(assert (= a_b 1))
(setv -a-_b- 2)
(assert (= -a-_b- 2))
(assert (= -a--b- 2))
(assert (= -a__b- 2))
(setv -_- 3)
(assert (= -_- 3))
(assert (= --- 3))
(assert (= ___ 3)))
(defn test-underscore-number []
(setv _42 3)
(assert (= _42 3))
(assert (!= _42 -42))
(assert (not (in "_hyx_42" (locals)))))
(defn test-question-mark []
(setv foo? "nachos")
(assert (= foo? "nachos"))
(assert (= is_foo "nachos"))
(setv ___ab_cd? "tacos")
(assert (= ___ab_cd? "tacos"))
(assert (= ___is_ab_cd "tacos")))
(defn test-py-forbidden-ascii []
(setv # "no comment")
(assert (= # "no comment"))
(if PY3
(assert (= hyx_Δnumber_signΔ "no comment"))
(assert (= hyx_Xnumber_signX "no comment")))
(setv $ "dosh")
(assert (= $ "dosh"))
(if PY3
(assert (= hyx_Δdollar_signΔ "dosh"))
(assert (= hyx_Xdollar_signX "dosh"))))
(defn test-basic-multilingual-plane []
(setv "love"
ab "flower")
(assert (= (+ ab ) "flowerlove"))
(if PY3
(assert (= (+ hyx_ΔflowerΔab hyx_Δblack_heart_suitΔ) "flowerlove"))
(assert (= (+ hyx_XflowerXab hyx_Xblack_heart_suitX) "flowerlove")))
(setv - "doubleflower")
(assert (= - "doubleflower"))
(if PY3
(assert (= hyx_ΔflowerΔ_ΔflowerΔ "doubleflower"))
(assert (= hyx_XflowerX_XflowerX "doubleflower")))
(setv ? "mystery")
(assert (= ? "mystery"))
(if PY3
(assert (= hyx_is_ΔflowerΔ "mystery"))
(assert (= hyx_is_XflowerX "mystery"))))
(defn test-higher-unicode []
(setv 😂 "emoji")
(assert (= 😂 "emoji"))
(if PY3
(assert (= hyx_Δface_with_tears_of_joyΔ "emoji"))
(assert (= hyx_XU1f602X "emoji"))))
(defn test-nameless-unicode []
(setv "private use")
(assert (= "private use"))
(if PY3
(assert (= hyx_ΔUe000Δ "private use"))
(assert (= hyx_XUe000X "private use"))))
(defn test-charname-with-hyphen []
(setv a<b "little")
(assert (= a<b "little"))
(if PY3
(assert (= hyx_aΔlessHthan_signΔb "little"))
(assert (= hyx_aXlessHthan_signXb "little"))))
(defn test-delimiters []
(setv Δ "Delta Air Lines")
(assert (= Δ "Delta Air Lines"))
(if PY3
(assert (= hyx_Δgreek_capital_letter_deltaΔΔairplaneΔ "Delta Air Lines"))
(assert (= hyx_Xgreek_capital_letter_deltaXXairplaneX "Delta Air Lines")))
(setv X "treasure")
(if PY3
(assert (= hyx_XΔskull_and_crossbonesΔ "treasure"))
(assert (= hyx_Xlatin_capital_letter_xXXskull_and_crossbonesX "treasure"))))
(deftag tm---x [form]
[form form])
(defn test-tag-macro []
(setv x "")
(assert (= #tm---x (do (+= x "a") 1) [1 1]))
(assert (= #tm___x (do (+= x "b") 2) [2 2]))
(assert (= x "aabb")))
(defn test-late-mangling []
; Mangling should only happen during compilation.
(assert (!= 'foo? 'is_foo))
(setv sym 'foo?)
(assert (= sym "foo?"))
(assert (!= sym "is_foo"))
(setv out (eval `(do
(setv ~sym 10)
[foo? is_foo])))
(assert (= out [10 10])))

View File

@ -3,6 +3,7 @@
;; license. See the LICENSE. ;; license. See the LICENSE.
(import [hy.errors [HyTypeError]]) (import [hy.errors [HyTypeError]])
(import [hy.lex.parser [hy-symbol-mangle]])
(defmacro rev [&rest body] (defmacro rev [&rest body]
"Execute the `body` statements in reverse" "Execute the `body` statements in reverse"
@ -162,9 +163,9 @@
(setv _ast2 (import_buffer_to_ast macro1 "foo")) (setv _ast2 (import_buffer_to_ast macro1 "foo"))
(setv s1 (to_source _ast1)) (setv s1 (to_source _ast1))
(setv s2 (to_source _ast2)) (setv s2 (to_source _ast2))
;; and make sure there is something new that starts with :G_ ;; and make sure there is something new that starts with _;G|
(assert (in "_;G|" s1)) (assert (in (hy-symbol-mangle "_;G|") s1))
(assert (in "_;G|" s2)) (assert (in (hy-symbol-mangle "_;G|") s2))
;; but make sure the two don't match each other ;; but make sure the two don't match each other
(assert (not (= s1 s2)))) (assert (not (= s1 s2))))
@ -188,8 +189,8 @@
(setv _ast2 (import_buffer_to_ast macro1 "foo")) (setv _ast2 (import_buffer_to_ast macro1 "foo"))
(setv s1 (to_source _ast1)) (setv s1 (to_source _ast1))
(setv s2 (to_source _ast2)) (setv s2 (to_source _ast2))
(assert (in "_;a|" s1)) (assert (in (hy-symbol-mangle "_;a|") s1))
(assert (in "_;a|" s2)) (assert (in (hy-symbol-mangle "_;a|") s2))
(assert (not (= s1 s2)))) (assert (not (= s1 s2))))
(defn test-defmacro-g! [] (defn test-defmacro-g! []

View File

@ -343,24 +343,6 @@ def test_lex_comment_382():
assert entry == [HySymbol("foo")] assert entry == [HySymbol("foo")]
def test_unmangle():
import sys
f = sys.modules["hy.lex.parser"].hy_symbol_unmangle
assert f("FOO") == "*foo*"
assert f("<") == "<"
assert f("FOOa") == "FOOa"
assert f("foo_bar") == "foo-bar"
assert f("_") == "_"
assert f("is_foo") == "foo?"
assert f("is_") == "is-"
assert f("foo_bang") == "foo!"
assert f("_bang") == "-bang"
def test_simple_cons(): def test_simple_cons():
"""Check that cons gets tokenized correctly""" """Check that cons gets tokenized correctly"""
entry = tokenize("(a . b)")[0] entry = tokenize("(a . b)")[0]