Delay importing the lexer and parser

This speeds up runs of Hy that never need to parse or compile Hy code (e.g., running a Hy program that's already byte-compiled).
This commit is contained in:
Kodi Arfer 2018-08-02 20:20:42 -04:00
parent 99851f7f6b
commit 734cdcd2fd
6 changed files with 88 additions and 85 deletions

View File

@ -15,8 +15,7 @@ import astor.code_gen
import hy
from hy.lex import LexException, PrematureEndOfInput
from hy.lex.parser import mangle
from hy.lex import LexException, PrematureEndOfInput, mangle
from hy.compiler import HyTypeError
from hy.importer import (hy_eval, import_buffer_to_module,
import_file_to_ast, import_file_to_hst,

View File

@ -11,7 +11,7 @@ from hy.model_patterns import (FORM, SYM, KEYWORD, STR, sym, brackets, whole,
from funcparserlib.parser import some, many, oneplus, maybe, NoParseError
from hy.errors import HyCompileError, HyTypeError
from hy.lex.parser import mangle, unmangle
from hy.lex import mangle, unmangle
import hy.macros
from hy._compat import (

View File

@ -19,8 +19,7 @@
(import [collections :as cabc])
(import [collections.abc :as cabc]))
(import [hy.models [HySymbol HyKeyword]])
(import [hy.lex [LexException PrematureEndOfInput tokenize]])
(import [hy.lex.parser [mangle unmangle]])
(import [hy.lex [LexException PrematureEndOfInput tokenize mangle unmangle]])
(import [hy.compiler [HyASTCompiler]])
(import [hy.importer [hy-eval :as eval]])

View File

@ -2,17 +2,19 @@
# This file is part of Hy, which is free software licensed under the Expat
# license. See the LICENSE.
from rply.errors import LexingError
from __future__ import unicode_literals
import re, unicodedata
from hy._compat import str_type, isidentifier, UCS4
from hy.lex.exceptions import LexException, PrematureEndOfInput # NOQA
from hy.lex.lexer import lexer
from hy.lex.parser import parser
def tokenize(buf):
"""
Tokenize a Lisp file or string buffer into internal Hy objects.
"""
from hy.lex.lexer import lexer
from hy.lex.parser import parser
from rply.errors import LexingError
try:
return parser.parse(lexer.lex(buf))
except LexingError as e:
@ -23,3 +25,80 @@ def tokenize(buf):
if e.source is None:
e.source = buf
raise
mangle_delim = 'X'
def mangle(s):
"""Stringify the argument and convert it to a valid Python identifier
according to Hy's mangling rules."""
def unicode_char_to_hex(uchr):
# Covert a unicode char to hex string, without prefix
return uchr.encode('unicode-escape').decode('utf-8').lstrip('\\U').lstrip('\\u').lstrip('0')
assert s
s = str_type(s)
s = s.replace("-", "_")
s2 = s.lstrip('_')
leading_underscores = '_' * (len(s) - len(s2))
s = s2
if s.endswith("?"):
s = 'is_' + s[:-1]
if not isidentifier(leading_underscores + s):
# Replace illegal characters with their Unicode character
# names, or hexadecimal if they don't have one.
s = 'hyx_' + ''.join(
c
if c != mangle_delim and isidentifier('S' + c)
# We prepend the "S" because some characters aren't
# allowed at the start of an identifier.
else '{0}{1}{0}'.format(mangle_delim,
unicodedata.name(c, '').lower().replace('-', 'H').replace(' ', '_')
or 'U{}'.format(unicode_char_to_hex(c)))
for c in unicode_to_ucs4iter(s))
s = leading_underscores + s
assert isidentifier(s)
return s
def unmangle(s):
"""Stringify the argument and try to convert it to a pretty unmangled
form. This may not round-trip, because different Hy symbol names can
mangle to the same Python identifier."""
s = str_type(s)
s2 = s.lstrip('_')
leading_underscores = len(s) - len(s2)
s = s2
if s.startswith('hyx_'):
s = re.sub('{0}(U)?([_a-z0-9H]+?){0}'.format(mangle_delim),
lambda mo:
chr(int(mo.group(2), base=16))
if mo.group(1)
else unicodedata.lookup(
mo.group(2).replace('_', ' ').replace('H', '-').upper()),
s[len('hyx_'):])
if s.startswith('is_'):
s = s[len("is_"):] + "?"
s = s.replace('_', '-')
return '-' * leading_underscores + s
def unicode_to_ucs4iter(ustr):
# Covert a unicode string to an iterable object,
# elements in the object are single USC-4 unicode characters
if UCS4:
return ustr
ucs4_list = list(ustr)
for i, u in enumerate(ucs4_list):
if 0xD7FF < ord(u) < 0xDC00:
ucs4_list[i] += ucs4_list[i + 1]
del ucs4_list[i + 1]
return ucs4_list

View File

@ -10,7 +10,7 @@ import re, unicodedata
from rply import ParserGenerator
from hy._compat import str_type, isidentifier, UCS4
from hy._compat import str_type
from hy.models import (HyBytes, HyComplex, HyDict, HyExpression, HyFloat,
HyInteger, HyKeyword, HyList, HySet, HyString, HySymbol)
from .lexer import lexer
@ -19,80 +19,6 @@ from .exceptions import LexException, PrematureEndOfInput
pg = ParserGenerator([rule.name for rule in lexer.rules] + ['$end'])
mangle_delim = 'X'
def unicode_to_ucs4iter(ustr):
# Covert a unicode string to an iterable object,
# elements in the object are single USC-4 unicode characters
if UCS4:
return ustr
ucs4_list = list(ustr)
for i, u in enumerate(ucs4_list):
if 0xD7FF < ord(u) < 0xDC00:
ucs4_list[i] += ucs4_list[i + 1]
del ucs4_list[i + 1]
return ucs4_list
def mangle(s):
"""Stringify the argument and convert it to a valid Python identifier
according to Hy's mangling rules."""
def unicode_char_to_hex(uchr):
# Covert a unicode char to hex string, without prefix
return uchr.encode('unicode-escape').decode('utf-8').lstrip('\\U').lstrip('\\u').lstrip('0')
assert s
s = str_type(s)
s = s.replace("-", "_")
s2 = s.lstrip('_')
leading_underscores = '_' * (len(s) - len(s2))
s = s2
if s.endswith("?"):
s = 'is_' + s[:-1]
if not isidentifier(leading_underscores + s):
# Replace illegal characters with their Unicode character
# names, or hexadecimal if they don't have one.
s = 'hyx_' + ''.join(
c
if c != mangle_delim and isidentifier('S' + c)
# We prepend the "S" because some characters aren't
# allowed at the start of an identifier.
else '{0}{1}{0}'.format(mangle_delim,
unicodedata.name(c, '').lower().replace('-', 'H').replace(' ', '_')
or 'U{}'.format(unicode_char_to_hex(c)))
for c in unicode_to_ucs4iter(s))
s = leading_underscores + s
assert isidentifier(s)
return s
def unmangle(s):
"""Stringify the argument and try to convert it to a pretty unmangled
form. This may not round-trip, because different Hy symbol names can
mangle to the same Python identifier."""
s = str_type(s)
s2 = s.lstrip('_')
leading_underscores = len(s) - len(s2)
s = s2
if s.startswith('hyx_'):
s = re.sub('{0}(U)?([_a-z0-9H]+?){0}'.format(mangle_delim),
lambda mo:
chr(int(mo.group(2), base=16))
if mo.group(1)
else unicodedata.lookup(
mo.group(2).replace('_', ' ').replace('H', '-').upper()),
s[len('hyx_'):])
if s.startswith('is_'):
s = s[len("is_"):] + "?"
s = s.replace('_', '-')
return '-' * leading_underscores + s
def set_boundaries(fun):
@wraps(fun)

View File

@ -5,7 +5,7 @@
from hy._compat import PY3
import hy.inspect
from hy.models import replace_hy_obj, HyExpression, HySymbol, wrap_value
from hy.lex.parser import mangle
from hy.lex import mangle
from hy._compat import str_type
from hy.errors import HyTypeError, HyMacroExpansionError