Delay importing the lexer and parser

This speeds up runs of Hy that never need to parse or compile Hy code (e.g., running a Hy program that's already byte-compiled).
2018-08-02 20:20:42 -04:00 · 2018-08-02 20:20:42 -04:00 · 734cdcd2fd
commit 734cdcd2fd
parent 99851f7f6b
6 changed files with 88 additions and 85 deletions
--- a/hy/cmdline.py
+++ b/hy/cmdline.py
@ -15,8 +15,7 @@ import astor.code_gen

 import hy

-from hy.lex import LexException, PrematureEndOfInput
-from hy.lex.parser import mangle
+from hy.lex import LexException, PrematureEndOfInput, mangle
 from hy.compiler import HyTypeError
 from hy.importer import (hy_eval, import_buffer_to_module,
                         import_file_to_ast, import_file_to_hst,
--- a/hy/compiler.py
+++ b/hy/compiler.py
@ -11,7 +11,7 @@ from hy.model_patterns import (FORM, SYM, KEYWORD, STR, sym, brackets, whole,
 from funcparserlib.parser import some, many, oneplus, maybe, NoParseError
 from hy.errors import HyCompileError, HyTypeError

-from hy.lex.parser import mangle, unmangle
+from hy.lex import mangle, unmangle

 import hy.macros
 from hy._compat import (
--- a/hy/core/language.hy
+++ b/hy/core/language.hy
@ -19,8 +19,7 @@
  (import [collections :as cabc])
  (import [collections.abc :as cabc]))
 (import [hy.models [HySymbol HyKeyword]])
-(import [hy.lex [LexException PrematureEndOfInput tokenize]])
-(import [hy.lex.parser [mangle unmangle]])
+(import [hy.lex [LexException PrematureEndOfInput tokenize mangle unmangle]])
 (import [hy.compiler [HyASTCompiler]])
 (import [hy.importer [hy-eval :as eval]])

--- a/hy/lex/init.py
+++ b/hy/lex/init.py
@ -2,17 +2,19 @@
 # This file is part of Hy, which is free software licensed under the Expat
 # license. See the LICENSE.

-from rply.errors import LexingError
+from __future__ import unicode_literals

+import re, unicodedata
+from hy._compat import str_type, isidentifier, UCS4
 from hy.lex.exceptions import LexException, PrematureEndOfInput  # NOQA
-from hy.lex.lexer import lexer
-from hy.lex.parser import parser
-

 def tokenize(buf):
    """
    Tokenize a Lisp file or string buffer into internal Hy objects.
    """
+    from hy.lex.lexer import lexer
+    from hy.lex.parser import parser
+    from rply.errors import LexingError
    try:
        return parser.parse(lexer.lex(buf))
    except LexingError as e:
@ -23,3 +25,80 @@ def tokenize(buf):
        if e.source is None:
            e.source = buf
        raise
+
+
+mangle_delim = 'X'
+
+
+def mangle(s):
+    """Stringify the argument and convert it to a valid Python identifier
+    according to Hy's mangling rules."""
+    def unicode_char_to_hex(uchr):
+        # Covert a unicode char to hex string, without prefix
+        return uchr.encode('unicode-escape').decode('utf-8').lstrip('\\U').lstrip('\\u').lstrip('0')
+
+    assert s
+
+    s = str_type(s)
+    s = s.replace("-", "_")
+    s2 = s.lstrip('_')
+    leading_underscores = '_' * (len(s) - len(s2))
+    s = s2
+
+    if s.endswith("?"):
+        s = 'is_' + s[:-1]
+    if not isidentifier(leading_underscores + s):
+        # Replace illegal characters with their Unicode character
+        # names, or hexadecimal if they don't have one.
+        s = 'hyx_' + ''.join(
+            c
+               if c != mangle_delim and isidentifier('S' + c)
+                 # We prepend the "S" because some characters aren't
+                 # allowed at the start of an identifier.
+               else '{0}{1}{0}'.format(mangle_delim,
+                   unicodedata.name(c, '').lower().replace('-', 'H').replace(' ', '_')
+                   or 'U{}'.format(unicode_char_to_hex(c)))
+            for c in unicode_to_ucs4iter(s))
+
+    s = leading_underscores + s
+    assert isidentifier(s)
+    return s
+
+
+def unmangle(s):
+    """Stringify the argument and try to convert it to a pretty unmangled
+    form. This may not round-trip, because different Hy symbol names can
+    mangle to the same Python identifier."""
+
+    s = str_type(s)
+
+    s2 = s.lstrip('_')
+    leading_underscores = len(s) - len(s2)
+    s = s2
+
+    if s.startswith('hyx_'):
+        s = re.sub('{0}(U)?([_a-z0-9H]+?){0}'.format(mangle_delim),
+            lambda mo:
+               chr(int(mo.group(2), base=16))
+               if mo.group(1)
+               else unicodedata.lookup(
+                   mo.group(2).replace('_', ' ').replace('H', '-').upper()),
+            s[len('hyx_'):])
+    if s.startswith('is_'):
+        s = s[len("is_"):] + "?"
+    s = s.replace('_', '-')
+
+    return '-' * leading_underscores + s
+
+
+def unicode_to_ucs4iter(ustr):
+    # Covert a unicode string to an iterable object,
+    # elements in the object are single USC-4 unicode characters
+    if UCS4:
+        return ustr
+    ucs4_list = list(ustr)
+    for i, u in enumerate(ucs4_list):
+        if 0xD7FF < ord(u) < 0xDC00:
+            ucs4_list[i] += ucs4_list[i + 1]
+            del ucs4_list[i + 1]
+    return ucs4_list
--- a/hy/lex/parser.py
+++ b/hy/lex/parser.py
@ -10,7 +10,7 @@ import re, unicodedata

 from rply import ParserGenerator

-from hy._compat import str_type, isidentifier, UCS4
+from hy._compat import str_type
 from hy.models import (HyBytes, HyComplex, HyDict, HyExpression, HyFloat,
                       HyInteger, HyKeyword, HyList, HySet, HyString, HySymbol)
 from .lexer import lexer
@ -19,80 +19,6 @@ from .exceptions import LexException, PrematureEndOfInput

 pg = ParserGenerator([rule.name for rule in lexer.rules] + ['$end'])

-mangle_delim = 'X'
-
-def unicode_to_ucs4iter(ustr):
-    # Covert a unicode string to an iterable object,
-    # elements in the object are single USC-4 unicode characters
-    if UCS4:
-        return ustr
-    ucs4_list = list(ustr)
-    for i, u in enumerate(ucs4_list):
-        if 0xD7FF < ord(u) < 0xDC00:
-            ucs4_list[i] += ucs4_list[i + 1]
-            del ucs4_list[i + 1]
-    return ucs4_list
-
-def mangle(s):
-    """Stringify the argument and convert it to a valid Python identifier
-    according to Hy's mangling rules."""
-    def unicode_char_to_hex(uchr):
-        # Covert a unicode char to hex string, without prefix
-        return uchr.encode('unicode-escape').decode('utf-8').lstrip('\\U').lstrip('\\u').lstrip('0')
-
-    assert s
-
-    s = str_type(s)
-    s = s.replace("-", "_")
-    s2 = s.lstrip('_')
-    leading_underscores = '_' * (len(s) - len(s2))
-    s = s2
-
-    if s.endswith("?"):
-        s = 'is_' + s[:-1]
-    if not isidentifier(leading_underscores + s):
-        # Replace illegal characters with their Unicode character
-        # names, or hexadecimal if they don't have one.
-        s = 'hyx_' + ''.join(
-            c
-               if c != mangle_delim and isidentifier('S' + c)
-                 # We prepend the "S" because some characters aren't
-                 # allowed at the start of an identifier.
-               else '{0}{1}{0}'.format(mangle_delim,
-                   unicodedata.name(c, '').lower().replace('-', 'H').replace(' ', '_')
-                   or 'U{}'.format(unicode_char_to_hex(c)))
-            for c in unicode_to_ucs4iter(s))
-
-    s = leading_underscores + s
-    assert isidentifier(s)
-    return s
-
-
-def unmangle(s):
-    """Stringify the argument and try to convert it to a pretty unmangled
-    form. This may not round-trip, because different Hy symbol names can
-    mangle to the same Python identifier."""
-
-    s = str_type(s)
-
-    s2 = s.lstrip('_')
-    leading_underscores = len(s) - len(s2)
-    s = s2
-
-    if s.startswith('hyx_'):
-        s = re.sub('{0}(U)?([_a-z0-9H]+?){0}'.format(mangle_delim),
-            lambda mo:
-               chr(int(mo.group(2), base=16))
-               if mo.group(1)
-               else unicodedata.lookup(
-                   mo.group(2).replace('_', ' ').replace('H', '-').upper()),
-            s[len('hyx_'):])
-    if s.startswith('is_'):
-        s = s[len("is_"):] + "?"
-    s = s.replace('_', '-')
-
-    return '-' * leading_underscores + s
-

 def set_boundaries(fun):
    @wraps(fun)
--- a/hy/macros.py
+++ b/hy/macros.py
@ -5,7 +5,7 @@
 from hy._compat import PY3
 import hy.inspect
 from hy.models import replace_hy_obj, HyExpression, HySymbol, wrap_value
-from hy.lex.parser import mangle
+from hy.lex import mangle
 from hy._compat import str_type

 from hy.errors import HyTypeError, HyMacroExpansionError