Mangle symbols at compile-time instead of parse-time

This means that a HySymbol remembers its original name. That is, `a-b` and `a_b` are different symbols although `(setv a-b 1)` and `(setv a_b 1)` set the same variable (namely, `a_b`).

Most of the edits in this commit are to switch underscores to hyphens in places where mangling hasn't happened yet.

I removed some lexer tests since the lexer no longer does any mangling.
This commit is contained in:
Kodi Arfer 2018-02-25 16:44:20 -08:00
parent d947a27022
commit d501d4d806
7 changed files with 49 additions and 103 deletions

View File

@ -53,7 +53,7 @@ def load_stdlib():
import hy.core
for module in hy.core.STDLIB:
mod = importlib.import_module(module)
for e in mod.EXPORTS:
for e in map(ast_str, mod.EXPORTS):
if getattr(mod, e) is not getattr(builtins, e, ''):
# Don't bother putting a name in _stdlib if it
# points to a builtin with the same name. This
@ -83,16 +83,16 @@ if PY35:
def ast_str(foobar):
if PY3:
return str(foobar)
return hy_symbol_mangle(str(foobar))
try:
return str(foobar)
return str(hy_symbol_mangle(str(foobar)))
except UnicodeEncodeError:
pass
enc = codecs.getencoder('punycode')
foobar, _ = enc(foobar)
return "hy_%s" % (str(foobar).replace("-", "_"))
return "hy_%s" % str(hy_symbol_mangle(foobar))
def builds(*types, **kwargs):
@ -379,7 +379,7 @@ def is_unpack(kind, x):
return (isinstance(x, HyExpression)
and len(x) > 0
and isinstance(x[0], HySymbol)
and x[0] == "unpack_" + kind)
and x[0] == "unpack-" + kind)
def ends_with_else(expr):
@ -699,17 +699,17 @@ class HyASTCompiler(object):
"""
if level == 0:
if isinstance(form, HyExpression):
if form and form[0] in ("unquote", "unquote_splice"):
if form and form[0] in ("unquote", "unquote-splice"):
if len(form) != 2:
raise HyTypeError(form,
("`%s' needs 1 argument, got %s" %
form[0], len(form) - 1))
return set(), form[1], (form[0] == "unquote_splice")
return set(), form[1], (form[0] == "unquote-splice")
if isinstance(form, HyExpression):
if form and form[0] == "quasiquote":
level += 1
if form and form[0] in ("unquote", "unquote_splice"):
if form and form[0] in ("unquote", "unquote-splice"):
level -= 1
name = form.__class__.__name__
@ -783,12 +783,12 @@ class HyASTCompiler(object):
ret.add_imports("hy", imports)
return ret
@builds("unquote", "unquote_splicing")
@builds("unquote", "unquote-splicing")
def compile_unquote(self, expr):
raise HyTypeError(expr,
"`%s' can't be used at the top-level" % expr[0])
@builds("unpack_iterable")
@builds("unpack-iterable")
@checkargs(exact=1)
def compile_unpack_iterable(self, expr):
if not PY3:
@ -797,7 +797,7 @@ class HyASTCompiler(object):
ret += asty.Starred(expr, value=ret.force_expr, ctx=ast.Load())
return ret
@builds("unpack_mapping")
@builds("unpack-mapping")
@checkargs(exact=1)
def compile_unpack_mapping(self, expr):
raise HyTypeError(expr, "`unpack-mapping` isn't allowed here")
@ -1143,12 +1143,12 @@ class HyASTCompiler(object):
ret += self.compile(expr[1])
return ret + asty.Yield(expr, value=ret.force_expr)
@builds("yield_from", iff=PY3)
@builds("yield-from", iff=PY3)
@builds("await", iff=PY35)
@checkargs(1)
def compile_yield_from_or_await_expression(self, expr):
ret = Result() + self.compile(expr[1])
node = asty.YieldFrom if expr[0] == "yield_from" else asty.Await
node = asty.YieldFrom if expr[0] == "yield-from" else asty.Await
return ret + node(expr, value=ret.force_expr)
@builds("import")
@ -1307,7 +1307,7 @@ class HyASTCompiler(object):
slice=ast.Slice(lower=nodes[1], upper=nodes[2], step=nodes[3]),
ctx=ast.Load())
@builds("with_decorator")
@builds("with-decorator")
@checkargs(min=1)
def compile_decorate_expression(self, expr):
expr.pop(0) # with-decorator
@ -1403,7 +1403,7 @@ class HyASTCompiler(object):
return gen_res + cond, gen
@builds("list_comp", "set_comp", "genexpr")
@builds("list-comp", "set-comp", "genexpr")
@checkargs(min=2, max=3)
def compile_comprehension(self, expr):
# (list-comp expr (target iter) cond?)
@ -1421,13 +1421,13 @@ class HyASTCompiler(object):
ret = self.compile(expression)
node_class = (
asty.ListComp if form == "list_comp" else
asty.SetComp if form == "set_comp" else
asty.ListComp if form == "list-comp" else
asty.SetComp if form == "set-comp" else
asty.GeneratorExp)
return ret + gen_res + node_class(
expr, elt=ret.force_expr, generators=gen)
@builds("dict_comp")
@builds("dict-comp")
@checkargs(min=3, max=4)
def compile_dict_comprehension(self, expr):
expr.pop(0) # dict-comp
@ -1558,8 +1558,8 @@ class HyASTCompiler(object):
ops = {"=": ast.Eq, "!=": ast.NotEq,
"<": ast.Lt, "<=": ast.LtE,
">": ast.Gt, ">=": ast.GtE,
"is": ast.Is, "is_not": ast.IsNot,
"in": ast.In, "not_in": ast.NotIn}
"is": ast.Is, "is-not": ast.IsNot,
"in": ast.In, "not-in": ast.NotIn}
inv = expression.pop(0)
ops = [ops[inv]() for _ in range(len(expression) - 1)]
@ -1578,12 +1578,12 @@ class HyASTCompiler(object):
asty.Name(expression, id="True", ctx=ast.Load()))
return self._compile_compare_op_expression(expression)
@builds("!=", "is_not")
@builds("!=", "is-not")
@checkargs(min=2)
def compile_compare_op_expression_coll(self, expression):
return self._compile_compare_op_expression(expression)
@builds("in", "not_in")
@builds("in", "not-in")
@checkargs(2)
def compile_compare_op_expression_binary(self, expression):
return self._compile_compare_op_expression(expression)
@ -1680,7 +1680,7 @@ class HyASTCompiler(object):
def compile_maths_expression_sub(self, expression):
return self._compile_maths_expression_additive(expression)
@builds("+=", "/=", "//=", "*=", "_=", "%=", "**=", "<<=", ">>=", "|=",
@builds("+=", "/=", "//=", "*=", "-=", "%=", "**=", "<<=", ">>=", "|=",
"^=", "&=")
@builds("@=", iff=PY35)
@checkargs(2)
@ -1689,7 +1689,7 @@ class HyASTCompiler(object):
"/=": ast.Div,
"//=": ast.FloorDiv,
"*=": ast.Mult,
"_=": ast.Sub,
"-=": ast.Sub,
"%=": ast.Mod,
"**=": ast.Pow,
"<<=": ast.LShift,
@ -1732,7 +1732,7 @@ class HyASTCompiler(object):
if isinstance(fn, HySymbol):
# First check if `fn` is a special form, unless it has an
# `unpack_iterable` in it, since Python's operators (`+`,
# `unpack-iterable` in it, since Python's operators (`+`,
# etc.) can't unpack. An exception to this exception is that
# tuple literals (`,`) can unpack.
if fn == "," or not (
@ -1785,7 +1785,7 @@ class HyASTCompiler(object):
# An exception for pulling together keyword args is if we're doing
# a typecheck, eg (type :foo)
with_kwargs = fn not in (
"type", "HyKeyword", "keyword", "name", "is_keyword")
"type", "HyKeyword", "keyword", "name", "keyword?")
args, ret, keywords, oldpy_star, oldpy_kw = self._compile_collect(
expression[1:], with_kwargs, oldpy_unpack=True)
@ -2057,7 +2057,7 @@ class HyASTCompiler(object):
pairs = expr[1:]
while len(pairs) > 0:
k, v = (pairs.pop(0), pairs.pop(0))
if k == HySymbol("__init__"):
if ast_str(k) == "__init__":
v.append(HySymbol("None"))
new_args.append(k)
new_args.append(v)
@ -2120,7 +2120,7 @@ class HyASTCompiler(object):
bases=bases_expr,
body=body.stmts)
@builds("dispatch_tag_macro")
@builds("dispatch-tag-macro")
@checkargs(exact=2)
def compile_dispatch_tag_macro(self, expression):
expression.pop(0) # dispatch-tag-macro
@ -2135,14 +2135,14 @@ class HyASTCompiler(object):
expr = tag_macroexpand(tag, expression.pop(0), self)
return self.compile(expr)
@builds("eval_and_compile", "eval_when_compile")
@builds("eval-and-compile", "eval-when-compile")
def compile_eval_and_compile(self, expression, building):
expression[0] = HySymbol("do")
hy.importer.hy_eval(expression,
compile_time_ns(self.module_name),
self.module_name)
return (self._compile_branch(expression[1:])
if building == "eval_and_compile"
if building == "eval-and-compile"
else Result())
@builds(HyCons)
@ -2198,8 +2198,8 @@ class HyASTCompiler(object):
attr=ast_str(local),
ctx=ast.Load())
if symbol in _stdlib:
self.imports[_stdlib[symbol]].add(symbol)
if ast_str(symbol) in _stdlib:
self.imports[_stdlib[ast_str(symbol)]].add(symbol)
return asty.Name(symbol, id=ast_str(symbol), ctx=ast.Load())

View File

@ -75,9 +75,9 @@
'quote "'"
'quasiquote "`"
'unquote "~"
'unquote_splice "~@"
'unpack_iterable "#* "
'unpack_mapping "#** "})
'unquote-splice "~@"
'unpack-iterable "#* "
'unpack-mapping "#** "})
(if (and x (symbol? (first x)) (in (first x) syntax))
(+ (get syntax (first x)) (hy-repr (second x)))
(+ "(" (-cat x) ")"))))

View File

@ -87,7 +87,7 @@ If the second argument `codegen` is true, generate python code instead."
"Return a generator from the original collection `coll` with no duplicates."
(setv seen (set) citer (iter coll))
(for* [val citer]
(if (not_in val seen)
(if (not-in val seen)
(do
(yield val)
(.add seen val)))))

View File

@ -201,7 +201,7 @@ def term_unquote(p):
@pg.production("term : UNQUOTESPLICE term")
@set_quote_boundaries
def term_unquote_splice(p):
return HyExpression([HySymbol("unquote_splice"), p[1]])
return HyExpression([HySymbol("unquote-splice"), p[1]])
@pg.production("term : HASHSTARS term")
@ -209,9 +209,9 @@ def term_unquote_splice(p):
def term_hashstars(p):
n_stars = len(p[0].getstr()[1:])
if n_stars == 1:
sym = "unpack_iterable"
sym = "unpack-iterable"
elif n_stars == 2:
sym = "unpack_mapping"
sym = "unpack-mapping"
else:
raise LexException(
"Too many stars in `#*` construct (if you want to unpack a symbol "
@ -227,7 +227,7 @@ def hash_other(p):
st = p[0].getstr()[1:]
str_object = HyString(st)
expr = p[1]
return HyExpression([HySymbol("dispatch_tag_macro"), str_object, expr])
return HyExpression([HySymbol("dispatch-tag-macro"), str_object, expr])
@pg.production("set : HLCURLY list_contents RCURLY")
@ -307,7 +307,7 @@ def t_identifier(p):
'`(. <expression> <attr>)` or `(.<attr> <expression>)`)',
p[0].source_pos.lineno, p[0].source_pos.colno)
return HySymbol(".".join(hy_symbol_mangle(x) for x in obj.split(".")))
return HySymbol(obj)
def symbol_like(obj):

View File

@ -338,7 +338,7 @@ class HyCons(HyObject):
# Keep unquotes in the cdr of conses
if type(cdr) == HyExpression:
if len(cdr) > 0 and type(cdr[0]) == HySymbol:
if cdr[0] in ("unquote", "unquote_splice"):
if cdr[0] in ("unquote", "unquote-splice"):
return super(HyCons, cls).__new__(cls)
return cdr.__class__([wrap_value(car)] + cdr)

View File

@ -247,9 +247,9 @@
(forbid (f))
(forbid (f "hello"))
(defclass C)
(setv x (get {"is_not" (C) "!=" 0} f-name))
(setv y (get {"is_not" (C) "!=" 1} f-name))
(setv z (get {"is_not" (C) "!=" 2} f-name))
(setv x (get {"is-not" (C) "!=" 0} f-name))
(setv y (get {"is-not" (C) "!=" 1} f-name))
(setv z (get {"is-not" (C) "!=" 2} f-name))
(assert (is (f x x) False))
(assert (is (f y y) False))
(assert (is (f x y) True))

View File

@ -121,8 +121,8 @@ def test_lex_nan_and_inf():
assert tokenize("INF") == [HySymbol("INF")]
assert tokenize("-Inf") == [HyFloat(float("-inf"))]
assert tokenize("-inf") == [HySymbol("_inf")]
assert tokenize("-INF") == [HySymbol("_INF")]
assert tokenize("-inf") == [HySymbol("-inf")]
assert tokenize("-INF") == [HySymbol("-INF")]
def test_lex_expression_complex():
@ -140,7 +140,7 @@ def test_lex_expression_complex():
assert t("nanj") == f(HySymbol("nanj"))
assert t("Inf+Infj") == f(HyComplex(complex(float("inf"), float("inf"))))
assert t("Inf-Infj") == f(HyComplex(complex(float("inf"), float("-inf"))))
assert t("Inf-INFj") == f(HySymbol("Inf_INFj"))
assert t("Inf-INFj") == f(HySymbol("Inf-INFj"))
def test_lex_digit_separators():
@ -332,7 +332,7 @@ def test_complex():
def test_tag_macro():
"""Ensure tag macros are handled properly"""
entry = tokenize("#^()")
assert entry[0][0] == HySymbol("dispatch_tag_macro")
assert entry[0][0] == HySymbol("dispatch-tag-macro")
assert entry[0][1] == HyString("^")
assert len(entry[0]) == 3
@ -343,60 +343,6 @@ def test_lex_comment_382():
assert entry == [HySymbol("foo")]
def test_lex_mangling_star():
"""Ensure that mangling starred identifiers works according to plan"""
entry = tokenize("*foo*")
assert entry == [HySymbol("FOO")]
entry = tokenize("*")
assert entry == [HySymbol("*")]
entry = tokenize("*foo")
assert entry == [HySymbol("*foo")]
def test_lex_mangling_hyphen():
"""Ensure that hyphens get translated to underscores during mangling"""
entry = tokenize("foo-bar")
assert entry == [HySymbol("foo_bar")]
entry = tokenize("-")
assert entry == [HySymbol("-")]
def test_lex_mangling_qmark():
"""Ensure that identifiers ending with a question mark get mangled ok"""
entry = tokenize("foo?")
assert entry == [HySymbol("is_foo")]
entry = tokenize("?")
assert entry == [HySymbol("?")]
entry = tokenize("im?foo")
assert entry == [HySymbol("im?foo")]
entry = tokenize(".foo?")
assert entry == [HySymbol(".is_foo")]
entry = tokenize("foo.bar?")
assert entry == [HySymbol("foo.is_bar")]
entry = tokenize("foo?.bar")
assert entry == [HySymbol("is_foo.bar")]
entry = tokenize(".foo?.bar.baz?")
assert entry == [HySymbol(".is_foo.bar.is_baz")]
def test_lex_mangling_bang():
"""Ensure that identifiers ending with a bang get mangled ok"""
entry = tokenize("foo!")
assert entry == [HySymbol("foo_bang")]
entry = tokenize("!")
assert entry == [HySymbol("!")]
entry = tokenize("im!foo")
assert entry == [HySymbol("im!foo")]
entry = tokenize(".foo!")
assert entry == [HySymbol(".foo_bang")]
entry = tokenize("foo.bar!")
assert entry == [HySymbol("foo.bar_bang")]
entry = tokenize("foo!.bar")
assert entry == [HySymbol("foo_bang.bar")]
entry = tokenize(".foo!.bar.baz!")
assert entry == [HySymbol(".foo_bang.bar.baz_bang")]
def test_unmangle():
import sys
f = sys.modules["hy.lex.parser"].hy_symbol_unmangle