From 734cdcd2fd760febc003b56769bcd2d0c12d6960 Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Thu, 2 Aug 2018 20:20:42 -0400 Subject: [PATCH] Delay importing the lexer and parser This speeds up runs of Hy that never need to parse or compile Hy code (e.g., running a Hy program that's already byte-compiled). --- hy/cmdline.py | 3 +- hy/compiler.py | 2 +- hy/core/language.hy | 3 +- hy/lex/__init__.py | 87 ++++++++++++++++++++++++++++++++++++++++++--- hy/lex/parser.py | 76 +-------------------------------------- hy/macros.py | 2 +- 6 files changed, 88 insertions(+), 85 deletions(-) diff --git a/hy/cmdline.py b/hy/cmdline.py index 3f2bb1f..b10028c 100644 --- a/hy/cmdline.py +++ b/hy/cmdline.py @@ -15,8 +15,7 @@ import astor.code_gen import hy -from hy.lex import LexException, PrematureEndOfInput -from hy.lex.parser import mangle +from hy.lex import LexException, PrematureEndOfInput, mangle from hy.compiler import HyTypeError from hy.importer import (hy_eval, import_buffer_to_module, import_file_to_ast, import_file_to_hst, diff --git a/hy/compiler.py b/hy/compiler.py index 7dee28c..f25f615 100755 --- a/hy/compiler.py +++ b/hy/compiler.py @@ -11,7 +11,7 @@ from hy.model_patterns import (FORM, SYM, KEYWORD, STR, sym, brackets, whole, from funcparserlib.parser import some, many, oneplus, maybe, NoParseError from hy.errors import HyCompileError, HyTypeError -from hy.lex.parser import mangle, unmangle +from hy.lex import mangle, unmangle import hy.macros from hy._compat import ( diff --git a/hy/core/language.hy b/hy/core/language.hy index 2895bb7..abb387f 100644 --- a/hy/core/language.hy +++ b/hy/core/language.hy @@ -19,8 +19,7 @@ (import [collections :as cabc]) (import [collections.abc :as cabc])) (import [hy.models [HySymbol HyKeyword]]) -(import [hy.lex [LexException PrematureEndOfInput tokenize]]) -(import [hy.lex.parser [mangle unmangle]]) +(import [hy.lex [LexException PrematureEndOfInput tokenize mangle unmangle]]) (import [hy.compiler [HyASTCompiler]]) (import [hy.importer [hy-eval :as eval]]) diff --git a/hy/lex/__init__.py b/hy/lex/__init__.py index 85d6502..5c05143 100644 --- a/hy/lex/__init__.py +++ b/hy/lex/__init__.py @@ -2,17 +2,19 @@ # This file is part of Hy, which is free software licensed under the Expat # license. See the LICENSE. -from rply.errors import LexingError +from __future__ import unicode_literals +import re, unicodedata +from hy._compat import str_type, isidentifier, UCS4 from hy.lex.exceptions import LexException, PrematureEndOfInput # NOQA -from hy.lex.lexer import lexer -from hy.lex.parser import parser - def tokenize(buf): """ Tokenize a Lisp file or string buffer into internal Hy objects. """ + from hy.lex.lexer import lexer + from hy.lex.parser import parser + from rply.errors import LexingError try: return parser.parse(lexer.lex(buf)) except LexingError as e: @@ -23,3 +25,80 @@ def tokenize(buf): if e.source is None: e.source = buf raise + + +mangle_delim = 'X' + + +def mangle(s): + """Stringify the argument and convert it to a valid Python identifier + according to Hy's mangling rules.""" + def unicode_char_to_hex(uchr): + # Covert a unicode char to hex string, without prefix + return uchr.encode('unicode-escape').decode('utf-8').lstrip('\\U').lstrip('\\u').lstrip('0') + + assert s + + s = str_type(s) + s = s.replace("-", "_") + s2 = s.lstrip('_') + leading_underscores = '_' * (len(s) - len(s2)) + s = s2 + + if s.endswith("?"): + s = 'is_' + s[:-1] + if not isidentifier(leading_underscores + s): + # Replace illegal characters with their Unicode character + # names, or hexadecimal if they don't have one. + s = 'hyx_' + ''.join( + c + if c != mangle_delim and isidentifier('S' + c) + # We prepend the "S" because some characters aren't + # allowed at the start of an identifier. + else '{0}{1}{0}'.format(mangle_delim, + unicodedata.name(c, '').lower().replace('-', 'H').replace(' ', '_') + or 'U{}'.format(unicode_char_to_hex(c))) + for c in unicode_to_ucs4iter(s)) + + s = leading_underscores + s + assert isidentifier(s) + return s + + +def unmangle(s): + """Stringify the argument and try to convert it to a pretty unmangled + form. This may not round-trip, because different Hy symbol names can + mangle to the same Python identifier.""" + + s = str_type(s) + + s2 = s.lstrip('_') + leading_underscores = len(s) - len(s2) + s = s2 + + if s.startswith('hyx_'): + s = re.sub('{0}(U)?([_a-z0-9H]+?){0}'.format(mangle_delim), + lambda mo: + chr(int(mo.group(2), base=16)) + if mo.group(1) + else unicodedata.lookup( + mo.group(2).replace('_', ' ').replace('H', '-').upper()), + s[len('hyx_'):]) + if s.startswith('is_'): + s = s[len("is_"):] + "?" + s = s.replace('_', '-') + + return '-' * leading_underscores + s + + +def unicode_to_ucs4iter(ustr): + # Covert a unicode string to an iterable object, + # elements in the object are single USC-4 unicode characters + if UCS4: + return ustr + ucs4_list = list(ustr) + for i, u in enumerate(ucs4_list): + if 0xD7FF < ord(u) < 0xDC00: + ucs4_list[i] += ucs4_list[i + 1] + del ucs4_list[i + 1] + return ucs4_list diff --git a/hy/lex/parser.py b/hy/lex/parser.py index 63ea277..a13c793 100755 --- a/hy/lex/parser.py +++ b/hy/lex/parser.py @@ -10,7 +10,7 @@ import re, unicodedata from rply import ParserGenerator -from hy._compat import str_type, isidentifier, UCS4 +from hy._compat import str_type from hy.models import (HyBytes, HyComplex, HyDict, HyExpression, HyFloat, HyInteger, HyKeyword, HyList, HySet, HyString, HySymbol) from .lexer import lexer @@ -19,80 +19,6 @@ from .exceptions import LexException, PrematureEndOfInput pg = ParserGenerator([rule.name for rule in lexer.rules] + ['$end']) -mangle_delim = 'X' - -def unicode_to_ucs4iter(ustr): - # Covert a unicode string to an iterable object, - # elements in the object are single USC-4 unicode characters - if UCS4: - return ustr - ucs4_list = list(ustr) - for i, u in enumerate(ucs4_list): - if 0xD7FF < ord(u) < 0xDC00: - ucs4_list[i] += ucs4_list[i + 1] - del ucs4_list[i + 1] - return ucs4_list - -def mangle(s): - """Stringify the argument and convert it to a valid Python identifier - according to Hy's mangling rules.""" - def unicode_char_to_hex(uchr): - # Covert a unicode char to hex string, without prefix - return uchr.encode('unicode-escape').decode('utf-8').lstrip('\\U').lstrip('\\u').lstrip('0') - - assert s - - s = str_type(s) - s = s.replace("-", "_") - s2 = s.lstrip('_') - leading_underscores = '_' * (len(s) - len(s2)) - s = s2 - - if s.endswith("?"): - s = 'is_' + s[:-1] - if not isidentifier(leading_underscores + s): - # Replace illegal characters with their Unicode character - # names, or hexadecimal if they don't have one. - s = 'hyx_' + ''.join( - c - if c != mangle_delim and isidentifier('S' + c) - # We prepend the "S" because some characters aren't - # allowed at the start of an identifier. - else '{0}{1}{0}'.format(mangle_delim, - unicodedata.name(c, '').lower().replace('-', 'H').replace(' ', '_') - or 'U{}'.format(unicode_char_to_hex(c))) - for c in unicode_to_ucs4iter(s)) - - s = leading_underscores + s - assert isidentifier(s) - return s - - -def unmangle(s): - """Stringify the argument and try to convert it to a pretty unmangled - form. This may not round-trip, because different Hy symbol names can - mangle to the same Python identifier.""" - - s = str_type(s) - - s2 = s.lstrip('_') - leading_underscores = len(s) - len(s2) - s = s2 - - if s.startswith('hyx_'): - s = re.sub('{0}(U)?([_a-z0-9H]+?){0}'.format(mangle_delim), - lambda mo: - chr(int(mo.group(2), base=16)) - if mo.group(1) - else unicodedata.lookup( - mo.group(2).replace('_', ' ').replace('H', '-').upper()), - s[len('hyx_'):]) - if s.startswith('is_'): - s = s[len("is_"):] + "?" - s = s.replace('_', '-') - - return '-' * leading_underscores + s - def set_boundaries(fun): @wraps(fun) diff --git a/hy/macros.py b/hy/macros.py index 52702ee..c5ab844 100644 --- a/hy/macros.py +++ b/hy/macros.py @@ -5,7 +5,7 @@ from hy._compat import PY3 import hy.inspect from hy.models import replace_hy_obj, HyExpression, HySymbol, wrap_value -from hy.lex.parser import mangle +from hy.lex import mangle from hy._compat import str_type from hy.errors import HyTypeError, HyMacroExpansionError