Overhaul mangling rules

This commit is contained in:
Kodi Arfer 2018-02-27 11:53:23 -08:00
parent d501d4d806
commit 52edad28e2
12 changed files with 221 additions and 113 deletions

View File

@ -35,3 +35,17 @@ if PY3:
else:
def raise_empty(t, *args):
raise t(*args)
def isidentifier(x):
if PY3:
return x.isidentifier()
else:
if x.rstrip() != x:
return False
import tokenize as T
from StringIO import StringIO
try:
tokens = list(T.generate_tokens(StringIO(x).readline))
except T.TokenError:
return False
return len(tokens) == 2 and tokens[0][0] == T.NAME

View File

@ -63,12 +63,12 @@ class HyREPL(code.InteractiveConsole):
elif callable(output_fn):
self.output_fn = output_fn
else:
f = hy_symbol_mangle(output_fn)
if "." in output_fn:
module, f = f.rsplit(".", 1)
parts = [hy_symbol_mangle(x) for x in output_fn.split(".")]
module, f = '.'.join(parts[:-1]), parts[-1]
self.output_fn = getattr(importlib.import_module(module), f)
else:
self.output_fn = __builtins__[f]
self.output_fn = __builtins__[hy_symbol_mangle(output_fn)]
code.InteractiveConsole.__init__(self, locals=locals,
filename=filename)

View File

@ -81,18 +81,11 @@ if PY35:
_decoratables += (ast.AsyncFunctionDef,)
def ast_str(foobar):
if PY3:
return hy_symbol_mangle(str(foobar))
try:
return str(hy_symbol_mangle(str(foobar)))
except UnicodeEncodeError:
pass
enc = codecs.getencoder('punycode')
foobar, _ = enc(foobar)
return "hy_%s" % str(hy_symbol_mangle(foobar))
def ast_str(x, piecewise=False):
if piecewise:
return ".".join(ast_str(s) if s else "" for s in x.split("."))
x = hy_symbol_mangle(str_type(x))
return x if PY3 else x.encode('UTF8')
def builds(*types, **kwargs):
@ -1156,19 +1149,16 @@ class HyASTCompiler(object):
expr = copy.deepcopy(expr)
def _compile_import(expr, module, names=None, importer=asty.Import):
if not names:
names = [ast.alias(name=ast_str(module), asname=None)]
names = [ast.alias(name=ast_str(module, piecewise=True), asname=None)]
ast_module = ast_str(module)
ast_module = ast_str(module, piecewise=True)
module = ast_module.lstrip(".")
level = len(ast_module) - len(module)
if not module:
module = None
ret = importer(expr,
module=module,
names=names,
level=level)
return Result() + ret
return Result() + importer(
expr, module=module, names=names, level=level)
expr.pop(0) # index
rimports = Result()
@ -1196,7 +1186,7 @@ class HyASTCompiler(object):
"garbage after aliased import")
iexpr.pop(0) # :as
alias = iexpr.pop(0)
names = [ast.alias(name=ast_str(module),
names = [ast.alias(name=ast_str(module, piecewise=True),
asname=ast_str(alias))]
rimports += _compile_import(expr, ast_str(module), names)
continue
@ -1210,7 +1200,7 @@ class HyASTCompiler(object):
alias = ast_str(entry.pop(0))
else:
alias = None
names.append(ast.alias(name=ast_str(sym),
names.append(ast.alias(name=(str(sym) if sym == "*" else ast_str(sym)),
asname=alias))
rimports += _compile_import(expr, module,
@ -2199,7 +2189,7 @@ class HyASTCompiler(object):
ctx=ast.Load())
if ast_str(symbol) in _stdlib:
self.imports[_stdlib[ast_str(symbol)]].add(symbol)
self.imports[_stdlib[ast_str(symbol)]].add(ast_str(symbol))
return asty.Name(symbol, id=ast_str(symbol), ctx=ast.Load())

View File

@ -488,7 +488,7 @@ Even objects with the __name__ magic will work."
False
(or a b)))
(setv *exports*
(setv EXPORTS
'[*map accumulate butlast calling-module-name chain coll? combinations
comp complement compress cons cons? constantly count cycle dec distinct
disassemble drop drop-last drop-while empty? eval even? every? exec first

View File

@ -163,7 +163,7 @@
(setv coll (get coll k)))
coll)
(setv *exports* [
(setv EXPORTS [
'+ '- '* '** '/ '// '% '@
'<< '>> '& '| '^ '~
'< '> '<= '>= '= '!=
@ -171,4 +171,4 @@
'is 'is-not 'in 'not-in
'get])
(if (not PY35)
(.remove *exports* '@))
(.remove EXPORTS '@))

View File

@ -15,8 +15,8 @@
(if (is _cache None) (do
(setv unmangle (. sys.modules ["hy.lex.parser"] hy_symbol_unmangle))
(setv _cache (frozenset (map unmangle (+
hy.core.language.*exports*
hy.core.shadow.*exports*
hy.core.language.EXPORTS
hy.core.shadow.EXPORTS
(list (.keys (get hy.macros._hy_macros None)))
keyword.kwlist
(list-comp k [k (.keys hy.compiler.-compile-table)]

View File

@ -1,3 +1,4 @@
# -*- encoding: utf-8 -*-
# Copyright 2018 the authors.
# This file is part of Hy, which is free software licensed under the Expat
# license. See the LICENSE.
@ -5,10 +6,11 @@
from __future__ import unicode_literals
from functools import wraps
import string, re, unicodedata
from rply import ParserGenerator
from hy._compat import str_type
from hy._compat import PY3, str_type, isidentifier
from hy.models import (HyBytes, HyComplex, HyCons, HyDict, HyExpression,
HyFloat, HyInteger, HyKeyword, HyList, HySet, HyString,
HySymbol)
@ -21,43 +23,57 @@ pg = ParserGenerator(
cache_id="hy_parser"
)
mangle_delim = 'Δ' if PY3 else 'X'
def hy_symbol_mangle(p):
if p.startswith("*") and p.endswith("*") and p not in ("*", "**"):
p = p[1:-1].upper()
def hy_symbol_mangle(s):
assert s
if "-" in p and p != "-":
p = p.replace("-", "_")
s = s.replace("-", "_")
s2 = s.lstrip('_')
leading_underscores = '_' * (len(s) - len(s2))
s = s2
if p.endswith("?") and p != "?":
p = "is_%s" % (p[:-1])
if s.endswith("?"):
s = 'is_' + s[:-1]
if not isidentifier(leading_underscores + s):
# Replace illegal characters with their Unicode character
# names, or hexadecimal if they don't have one.
s = 'hyx_' + ''.join(
c
if c != mangle_delim and isidentifier('S' + c)
# We prepend the "S" because some characters aren't
# allowed at the start of an identifier.
else '{0}{1}{0}'.format(mangle_delim,
unicodedata.name(c, '').lower().replace('-', 'H').replace(' ', '_')
or 'U{:x}'.format(ord(c)))
for c in s)
if p.endswith("!") and p != "!":
p = "%s_bang" % (p[:-1])
return p
s = leading_underscores + s
assert isidentifier(s)
return s
def hy_symbol_unmangle(p):
# hy_symbol_mangle is one-way, so this can't be perfect.
# But it can be useful till we have a way to get the original
# symbol (https://github.com/hylang/hy/issues/360).
p = str_type(p)
def hy_symbol_unmangle(s):
# hy_symbol_mangle is one-way, so this won't round-trip.
s = str_type(s)
if p.endswith("_bang") and p != "_bang":
p = p[:-len("_bang")] + "!"
s2 = s.lstrip('_')
leading_underscores = len(s) - len(s2)
s = s2
if p.startswith("is_") and p != "is_":
p = p[len("is_"):] + "?"
if s.startswith('hyx_'):
s = re.sub('{0}(U)?([_a-z0-9H]+?){0}'.format(mangle_delim),
lambda mo:
chr(int(mo.group(2), base=16))
if mo.group(1)
else unicodedata.lookup(
mo.group(2).replace('_', ' ').replace('H', '-').upper()),
s[len('hyx_'):])
if s.startswith('is_'):
s = s[len("is_"):] + "?"
s = s.replace('_', '-')
if "_" in p and p != "_":
p = p.replace("_", "-")
if (all([c.isalpha() and c.isupper() or c == '_' for c in p]) and
any([c.isalpha() for c in p])):
p = '*' + p.lower() + '*'
return p
return '-' * leading_underscores + s
def set_boundaries(fun):

View File

@ -2,8 +2,11 @@
# This file is part of Hy, which is free software licensed under the Expat
# license. See the LICENSE.
from hy._compat import PY3
import hy.inspect
from hy.models import replace_hy_obj, HyExpression, HySymbol
from hy.lex.parser import hy_symbol_mangle
from hy.errors import HyTypeError, HyMacroExpansionError
from collections import defaultdict
@ -62,11 +65,14 @@ def tag(name):
"""
def _(fn):
fn.__name__ = '#{}'.format(name)
_name = hy_symbol_mangle('#{}'.format(name))
if not PY3:
_name = _name.encode('UTF-8')
fn.__name__ = _name
module_name = fn.__module__
if module_name.startswith("hy.core"):
module_name = None
_hy_tag[module_name][name] = fn
_hy_tag[module_name][hy_symbol_mangle(name)] = fn
return fn
return _

View File

@ -738,13 +738,6 @@
(assert (= x 2)))
(defn test-earmuffs []
"NATIVE: Test earmuffs"
(setv *foo* "2")
(setv foo "3")
(assert (= *foo* FOO))
(assert (!= *foo* foo)))
(defn test-threading []
"NATIVE: test threading macro"
@ -1112,27 +1105,6 @@
(assert (= ((fn [] (-> 2 (+ 1 1) (* 1 2)))) 8)))
(defn test-symbol-utf-8 []
"NATIVE: test symbol encoded"
(setv "love"
"flower")
(assert (= (+ ) "flowerlove")))
(defn test-symbol-dash []
"NATIVE: test symbol encoded"
(setv - "doublelove"
-_- "what?")
(assert (= - "doublelove"))
(assert (= -_- "what?")))
(defn test-symbol-question-mark []
"NATIVE: test foo? -> is_foo behavior"
(setv foo? "nachos")
(assert (= is_foo "nachos")))
(defn test-and []
"NATIVE: test the and function"
@ -1816,4 +1788,4 @@ macros()
(defn test-relative-import []
"Make sure relative imports work properly"
(import [..resources [tlib]])
(assert (= tlib.*secret-message* "Hello World")))
(assert (= tlib.SECRET-MESSAGE "Hello World")))

View File

@ -0,0 +1,127 @@
;; Copyright 2018 the authors.
;; This file is part of Hy, which is free software licensed under the Expat
;; license. See the LICENSE.
(import [hy._compat [PY3]])
(defn test-hyphen []
(setv a-b 1)
(assert (= a-b 1))
(assert (= a_b 1))
(setv -a-_b- 2)
(assert (= -a-_b- 2))
(assert (= -a--b- 2))
(assert (= -a__b- 2))
(setv -_- 3)
(assert (= -_- 3))
(assert (= --- 3))
(assert (= ___ 3)))
(defn test-underscore-number []
(setv _42 3)
(assert (= _42 3))
(assert (!= _42 -42))
(assert (not (in "_hyx_42" (locals)))))
(defn test-question-mark []
(setv foo? "nachos")
(assert (= foo? "nachos"))
(assert (= is_foo "nachos"))
(setv ___ab_cd? "tacos")
(assert (= ___ab_cd? "tacos"))
(assert (= ___is_ab_cd "tacos")))
(defn test-py-forbidden-ascii []
(setv # "no comment")
(assert (= # "no comment"))
(if PY3
(assert (= hyx_Δnumber_signΔ "no comment"))
(assert (= hyx_Xnumber_signX "no comment")))
(setv $ "dosh")
(assert (= $ "dosh"))
(if PY3
(assert (= hyx_Δdollar_signΔ "dosh"))
(assert (= hyx_Xdollar_signX "dosh"))))
(defn test-basic-multilingual-plane []
(setv "love"
ab "flower")
(assert (= (+ ab ) "flowerlove"))
(if PY3
(assert (= (+ hyx_ΔflowerΔab hyx_Δblack_heart_suitΔ) "flowerlove"))
(assert (= (+ hyx_XflowerXab hyx_Xblack_heart_suitX) "flowerlove")))
(setv - "doubleflower")
(assert (= - "doubleflower"))
(if PY3
(assert (= hyx_ΔflowerΔ_ΔflowerΔ "doubleflower"))
(assert (= hyx_XflowerX_XflowerX "doubleflower")))
(setv ? "mystery")
(assert (= ? "mystery"))
(if PY3
(assert (= hyx_is_ΔflowerΔ "mystery"))
(assert (= hyx_is_XflowerX "mystery"))))
(defn test-higher-unicode []
(setv 😂 "emoji")
(assert (= 😂 "emoji"))
(if PY3
(assert (= hyx_Δface_with_tears_of_joyΔ "emoji"))
(assert (= hyx_XU1f602X "emoji"))))
(defn test-nameless-unicode []
(setv "private use")
(assert (= "private use"))
(if PY3
(assert (= hyx_ΔUe000Δ "private use"))
(assert (= hyx_XUe000X "private use"))))
(defn test-charname-with-hyphen []
(setv a<b "little")
(assert (= a<b "little"))
(if PY3
(assert (= hyx_aΔlessHthan_signΔb "little"))
(assert (= hyx_aXlessHthan_signXb "little"))))
(defn test-delimiters []
(setv Δ "Delta Air Lines")
(assert (= Δ "Delta Air Lines"))
(if PY3
(assert (= hyx_Δgreek_capital_letter_deltaΔΔairplaneΔ "Delta Air Lines"))
(assert (= hyx_Xgreek_capital_letter_deltaXXairplaneX "Delta Air Lines")))
(setv X "treasure")
(if PY3
(assert (= hyx_XΔskull_and_crossbonesΔ "treasure"))
(assert (= hyx_Xlatin_capital_letter_xXXskull_and_crossbonesX "treasure"))))
(deftag tm---x [form]
[form form])
(defn test-tag-macro []
(setv x "")
(assert (= #tm---x (do (+= x "a") 1) [1 1]))
(assert (= #tm___x (do (+= x "b") 2) [2 2]))
(assert (= x "aabb")))
(defn test-late-mangling []
; Mangling should only happen during compilation.
(assert (!= 'foo? 'is_foo))
(setv sym 'foo?)
(assert (= sym "foo?"))
(assert (!= sym "is_foo"))
(setv out (eval `(do
(setv ~sym 10)
[foo? is_foo])))
(assert (= out [10 10])))

View File

@ -3,6 +3,7 @@
;; license. See the LICENSE.
(import [hy.errors [HyTypeError]])
(import [hy.lex.parser [hy-symbol-mangle]])
(defmacro rev [&rest body]
"Execute the `body` statements in reverse"
@ -162,9 +163,9 @@
(setv _ast2 (import_buffer_to_ast macro1 "foo"))
(setv s1 (to_source _ast1))
(setv s2 (to_source _ast2))
;; and make sure there is something new that starts with :G_
(assert (in "_;G|" s1))
(assert (in "_;G|" s2))
;; and make sure there is something new that starts with _;G|
(assert (in (hy-symbol-mangle "_;G|") s1))
(assert (in (hy-symbol-mangle "_;G|") s2))
;; but make sure the two don't match each other
(assert (not (= s1 s2))))
@ -188,8 +189,8 @@
(setv _ast2 (import_buffer_to_ast macro1 "foo"))
(setv s1 (to_source _ast1))
(setv s2 (to_source _ast2))
(assert (in "_;a|" s1))
(assert (in "_;a|" s2))
(assert (in (hy-symbol-mangle "_;a|") s1))
(assert (in (hy-symbol-mangle "_;a|") s2))
(assert (not (= s1 s2))))
(defn test-defmacro-g! []

View File

@ -343,24 +343,6 @@ def test_lex_comment_382():
assert entry == [HySymbol("foo")]
def test_unmangle():
import sys
f = sys.modules["hy.lex.parser"].hy_symbol_unmangle
assert f("FOO") == "*foo*"
assert f("<") == "<"
assert f("FOOa") == "FOOa"
assert f("foo_bar") == "foo-bar"
assert f("_") == "_"
assert f("is_foo") == "foo?"
assert f("is_") == "is-"
assert f("foo_bang") == "foo!"
assert f("_bang") == "-bang"
def test_simple_cons():
"""Check that cons gets tokenized correctly"""
entry = tokenize("(a . b)")[0]