Automatically read and write bytecode

Importing or executing a Hy file now loads the byte-compiled version if it exists and is up to date, and if not, the source is byte-compiled after it's parsed.

This change can speed up Hy a lot. Here are some examples comparing run times of the current master (491b474e) to this commit, on my laptop with Python 3.6:

- `nosetests --exclude='test_bin'` goes from 3.8 s to 0.7 s (a 5-fold speedup)
- `hy -c '(print "hello world")` goes from 0.47 s to 0.20 s (a 2-fold speedup)
- Rogue TV's startup goes from 3.6 s to 0.4 s (a 9-fold speedup)

Accompanying changes include:

- `setup.py` now creates and installs bytecode for `hy.core`, `hy.contrib`, and `hy.extra`.
- The `hyc` command under Python 3 now creates bytecode in `__pycache__`, as usual for Python 3, instead of putting the `.pyc` right next to the source file like Python 2 does.

I've removed a test of `hy.extra.anaphoric.a-if` that triggers #1268 when the test file is byte-compiled and then hits some weird `macroexpand` bug or something when I try to work around that—Nose crashes when trying to produce an error message, and I can't seem to replicate the bug without Nose.
This commit is contained in:
Kodi Arfer 2017-04-09 17:27:51 -07:00
parent 7a53fdb180
commit 2b11b9be20
9 changed files with 213 additions and 79 deletions

4
NEWS
View File

@ -4,6 +4,10 @@ Changes from 0.12.1
* `let` has been removed. Python's scoping rules do not make a proper
implementation of it possible. Use `setv` instead.
* `lambda` has been removed, but `fn` now does exactly what `lambda` did
* Importing or executing a Hy file automatically byte-compiles it, or loads
a byte-compiled version if it exists and is up to date. This brings big
speed boosts, even for one-liners, because Hy no longer needs to recompile
its standard library for every startup.
* Added bytestring literals, which create `bytes` objects under Python 3
and `str` objects under Python 2
* Commas and underscores are allowed in numeric literals

View File

@ -25,14 +25,16 @@ from hy.lex import tokenize, LexException
from hy.errors import HyIOError
from io import open
import re
import marshal
import struct
import imp
import sys
import ast
import os
import __future__
from hy._compat import PY3, PY33, MAGIC, builtins, long_type, wr_long
from hy._compat import PY3, PY33, PY34, MAGIC, builtins, long_type, wr_long
from hy._compat import string_types
@ -68,25 +70,77 @@ def import_file_to_ast(fpath, module_name):
return hy_compile(import_file_to_hst(fpath), module_name)
def import_file_to_module(module_name, fpath):
"""Import content from fpath and puts it into a Python module.
def import_file_to_module(module_name, fpath, loader=None):
"""Import Hy source from fpath and put it into a Python module.
Returns the module."""
If there's an up-to-date byte-compiled version of this module, load that
instead. Otherwise, byte-compile the module once we're done loading it, if
we can.
Return the module."""
module = None
bytecode_path = get_bytecode_path(fpath)
try:
_ast = import_file_to_ast(fpath, module_name)
mod = imp.new_module(module_name)
mod.__file__ = fpath
eval(ast_compile(_ast, fpath, "exec"), mod.__dict__)
except (HyTypeError, LexException) as e:
if e.source is None:
with open(fpath, 'rt') as fp:
e.source = fp.read()
e.filename = fpath
raise
except Exception:
sys.modules.pop(module_name, None)
raise
return mod
source_mtime = int(os.stat(fpath).st_mtime)
with open(bytecode_path, 'rb') as bc_f:
# To get the bytecode file's internal timestamp, take the 4 bytes
# after the first 4 bytes and interpret them as a little-endian
# 32-bit integer.
bytecode_mtime = struct.unpack('<i', bc_f.read(8)[4:])[0]
except (IOError, OSError):
pass
else:
if bytecode_mtime >= source_mtime:
# It's a cache hit. Load the byte-compiled version.
if PY3:
# As of Python 3.6, imp.load_compiled still exists, but it's
# deprecated. So let's use SourcelessFileLoader instead.
from importlib.machinery import SourcelessFileLoader
module = (SourcelessFileLoader(module_name, bytecode_path).
load_module(module_name))
else:
module = imp.load_compiled(module_name, bytecode_path)
if not module:
# It's a cache miss, so load from source.
sys.modules[module_name] = None
try:
_ast = import_file_to_ast(fpath, module_name)
module = imp.new_module(module_name)
module.__file__ = fpath
code = ast_compile(_ast, fpath, "exec")
try:
write_code_as_pyc(fpath, code)
except (IOError, OSError):
# We failed to save the bytecode, probably because of a
# permissions issue. The user only asked to import the file, so
# don't bug them about it.
pass
eval(code, module.__dict__)
except (HyTypeError, LexException) as e:
if e.source is None:
with open(fpath, 'rt') as fp:
e.source = fp.read()
e.filename = fpath
raise
except Exception:
sys.modules.pop(module_name, None)
raise
sys.modules[module_name] = module
module.__file__ = fpath
module.__name__ = module_name
if loader:
module.__loader__ = loader
if is_package(module_name):
module.__path__ = []
module.__package__ = module_name
else:
module.__package__ = module_name.rpartition('.')[0]
return module
def import_buffer_to_module(module_name, buf):
@ -138,47 +192,34 @@ def hy_eval(hytree, namespace, module_name, ast_callback=None):
def write_hy_as_pyc(fname):
with open(fname, 'U') as f:
try:
st = os.fstat(f.fileno())
except AttributeError:
st = os.stat(fname)
timestamp = long_type(st.st_mtime)
_ast = import_file_to_ast(fname,
os.path.basename(os.path.splitext(fname)[0]))
code = ast_compile(_ast, fname, "exec")
cfile = "%s.pyc" % fname[:-len(".hy")]
write_code_as_pyc(fname, code)
open_ = builtins.open
with open_(cfile, 'wb') as fc:
if PY3:
fc.write(b'\0\0\0\0')
else:
fc.write('\0\0\0\0')
def write_code_as_pyc(fname, code):
st = os.stat(fname)
timestamp = long_type(st.st_mtime)
cfile = get_bytecode_path(fname)
try:
os.makedirs(os.path.dirname(cfile))
except (IOError, OSError):
pass
with builtins.open(cfile, 'wb') as fc:
fc.write(MAGIC)
wr_long(fc, timestamp)
if PY33:
wr_long(fc, st.st_size)
marshal.dump(code, fc)
fc.flush()
fc.seek(0, 0)
fc.write(MAGIC)
class MetaLoader(object):
def __init__(self, path):
self.path = path
def is_package(self, fullname):
dirpath = "/".join(fullname.split("."))
for pth in sys.path:
pth = os.path.abspath(pth)
composed_path = "%s/%s/__init__.hy" % (pth, dirpath)
if os.path.exists(composed_path):
return True
return False
def load_module(self, fullname):
if fullname in sys.modules:
return sys.modules[fullname]
@ -186,24 +227,7 @@ class MetaLoader(object):
if not self.path:
return
sys.modules[fullname] = None
mod = import_file_to_module(fullname,
self.path)
ispkg = self.is_package(fullname)
mod.__file__ = self.path
mod.__loader__ = self
mod.__name__ = fullname
if ispkg:
mod.__path__ = []
mod.__package__ = fullname
else:
mod.__package__ = fullname.rpartition('.')[0]
sys.modules[fullname] = mod
return mod
return import_file_to_module(fullname, self.path, self)
class MetaImporter(object):
@ -226,3 +250,24 @@ class MetaImporter(object):
sys.meta_path.insert(0, MetaImporter())
sys.path.insert(0, "")
def is_package(module_name):
mpath = os.path.join(*module_name.split("."))
for path in map(os.path.abspath, sys.path):
if os.path.exists(os.path.join(path, mpath, "__init__.hy")):
return True
return False
def get_bytecode_path(source_path):
if PY34:
import importlib.util
return importlib.util.cache_from_source(source_path)
elif hasattr(imp, "cache_from_source"):
return imp.cache_from_source(source_path)
else:
# If source_path has a file extension, replace it with ".pyc".
# Otherwise, just append ".pyc".
d, f = os.path.split(source_path)
return os.path.join(d, re.sub(r"(?:\.[^.]+)?\Z", ".pyc", f))

View File

@ -19,3 +19,6 @@ omit =
exclude_lines =
# Have to re-enable the standard pragma
pragma: no cover
# We want ignore_errors so we don't get NoSource warnings for loading
# byte-compiled Hy modules.
ignore_errors = True

View File

@ -26,6 +26,7 @@ import runpy
import subprocess
from setuptools import find_packages, setup
from setuptools.command.install import install
os.chdir(os.path.split(os.path.abspath(__file__))[0])
@ -48,6 +49,17 @@ long_description = """Hy is a Python <--> Lisp layer. It helps
make things work nicer, and lets Python and the Hy lisp variant play
nice together. """
class Install(install):
def run(self):
# Import each Hy module to ensure it's compiled.
import os, importlib
for dirpath, _, filenames in os.walk("hy"):
for filename in filenames:
if filename.endswith(".hy"):
importlib.import_module(
dirpath.replace("/", ".") + "." + filename[:-len(".hy")])
install.run(self)
install_requires = ['rply>=0.7.0', 'astor>=0.5', 'clint>=0.4']
if sys.version_info[:2] < (2, 7):
install_requires.append('argparse>=1.2.1')
@ -61,6 +73,7 @@ setup(
name=PKG,
version=__version__,
install_requires=install_requires,
cmdclass=dict(install=Install),
entry_points={
'console_scripts': [
'hy = hy.cmdline:hy_main',
@ -73,9 +86,9 @@ setup(
},
packages=find_packages(exclude=['tests*']),
package_data={
'hy.contrib': ['*.hy'],
'hy.core': ['*.hy'],
'hy.extra': ['*.hy'],
'hy.contrib': ['*.hy', '__pycache__/*'],
'hy.core': ['*.hy', '__pycache__/*'],
'hy.extra': ['*.hy', '__pycache__/*'],
},
author="Paul Tagliamonte",
author_email="tag@pault.ag",

View File

@ -1,7 +1,9 @@
from hy.importer import import_file_to_module, import_buffer_to_ast, MetaLoader
from hy.importer import (import_file_to_module, import_buffer_to_ast,
MetaLoader, get_bytecode_path)
from hy.errors import HyTypeError
import os
import ast
import tempfile
def test_basics():
@ -11,7 +13,6 @@ def test_basics():
def test_stringer():
"Make sure the basics of the importer work"
_ast = import_buffer_to_ast("(defn square [x] (* x x))", '')
assert type(_ast.body[0]) == ast.FunctionDef
@ -41,3 +42,21 @@ def test_import_error_reporting():
assert _import_error_test() == "Error reported"
assert _import_error_test() is not None
def test_import_autocompiles():
"Test that (import) byte-compiles the module."
f = tempfile.NamedTemporaryFile(suffix='.hy', delete=False)
f.write(b'(defn pyctest [s] (+ "X" s "Y"))')
f.close()
try:
os.remove(get_bytecode_path(f.name))
except (IOError, OSError):
pass
import_file_to_module("mymodule", f.name)
assert os.path.exists(get_bytecode_path(f.name))
os.remove(f.name)
os.remove(get_bytecode_path(f.name))

View File

@ -1,20 +1,20 @@
import os
import imp
import tempfile
from hy.importer import write_hy_as_pyc
from hy.importer import write_hy_as_pyc, get_bytecode_path
def test_pyc():
"""Test pyc compilation."""
f = tempfile.NamedTemporaryFile(suffix='.hy', delete=False)
f.write(b'(defn pyctest [s] s)')
f.write(b'(defn pyctest [s] (+ "X" s "Y"))')
f.close()
write_hy_as_pyc(f.name)
os.unlink(f.name)
os.remove(f.name)
cfile = "%s.pyc" % f.name[:-len(".hy")]
cfile = get_bytecode_path(f.name)
mod = imp.load_compiled('pyc', cfile)
os.unlink(cfile)
os.remove(cfile)
assert mod.pyctest('Foo') == 'Foo'
assert mod.pyctest('Foo') == 'XFooY'

View File

@ -36,10 +36,7 @@
(defn test-ap-if []
"NATIVE: testing anaphoric if"
(ap-if True (assert-true it))
(ap-if False True (assert-false it))
(try (macroexpand '(ap-if True))
(except [HyMacroExpansionError] True)
(else (assert False))))
(ap-if False True (assert-false it)))
(defn test-ap-each []
"NATIVE: testing anaphoric each"

View File

@ -0,0 +1,5 @@
(defmacro m []
(print "Hello from macro")
"boink")
(print "The macro returned:" (m))

View File

@ -25,6 +25,7 @@ import os
import subprocess
import re
from hy._compat import PY3
from hy.importer import get_bytecode_path
hy_dir = os.environ.get('HY_DIR', '')
@ -55,6 +56,16 @@ def run_cmd(cmd, stdin_data=None, expect=0):
return stdout, stderr
def rm(fpath):
try:
os.remove(fpath)
except (IOError, OSError):
try:
os.rmdir(fpath)
except (IOError, OSError):
pass
def test_bin_hy():
run_cmd("hy", "")
@ -190,9 +201,11 @@ def test_bin_hyc():
output, _ = run_cmd("hyc -h")
assert "usage" in output
output, _ = run_cmd("hyc tests/resources/argparse_ex.hy")
path = "tests/resources/argparse_ex.hy"
output, _ = run_cmd("hyc " + path)
assert "Compiling" in output
assert os.path.exists("tests/resources/argparse_ex.pyc")
assert os.path.exists(get_bytecode_path(path))
rm(get_bytecode_path(path))
def test_bin_hyc_missing_file():
@ -243,6 +256,41 @@ def test_bin_hy_no_main():
assert "This Should Still Work" in output
def test_bin_hy_byte_compile():
modname = "tests.resources.bin.bytecompile"
fpath = modname.replace(".", "/") + ".hy"
for can_byte_compile in [True, False]:
for cmd in ["hy " + fpath,
"hy -m " + modname,
"hy -c '(import {})'".format(modname)]:
rm(get_bytecode_path(fpath))
if not can_byte_compile:
# Keep Hy from being able to byte-compile the module by
# creating a directory at the target location.
os.mkdir(get_bytecode_path(fpath))
# Whether or not we can byte-compile the module, we should be able
# to run it.
output, _ = run_cmd(cmd)
assert "Hello from macro" in output
assert "The macro returned: boink" in output
if can_byte_compile:
# That should've byte-compiled the module.
assert os.path.exists(get_bytecode_path(fpath))
# When we run the same command again, and we've byte-compiled the
# module, the byte-compiled version should be run instead of the
# source, in which case the macro shouldn't be run.
output, _ = run_cmd(cmd)
assert ("Hello from macro" in output) ^ can_byte_compile
assert "The macro returned: boink" in output
def test_bin_hy_module_main():
output, _ = run_cmd("hy -m tests.resources.bin.main")
assert "Hello World" in output