Add bytestring literals

This commit is contained in:
Kodi Arfer 2017-02-18 16:15:58 -08:00 committed by Tuukka Turto
parent 92d5d6b42c
commit 45b7a4ac9d
10 changed files with 82 additions and 14 deletions

2
NEWS
View File

@ -3,6 +3,8 @@ Changes from 0.12.1
[ Language Changes ]
* `let` has been removed. Python's scoping rules do not make a proper
implementation of it possible. Use `setv` instead.
* Added bytestring literals, which create `bytes` objects under Python 3
and `str` objects under Python 2
* Commas and underscores are allowed in numeric literals
* xor: If exactly one argument is true, return it

View File

@ -52,6 +52,19 @@ digits.
(print 10,000,000,000 10_000_000_000)
string literals
---------------
Unlike Python, Hy allows only double-quoted strings. The single-quote character
is reserved for preventing the evaluation of a form, as in most Lisps.
Whether running under Python 2 or Python 3, Hy treats string literals as
sequences of Unicode characters by default, and allows you to prefix a literal
with ``b`` to treat it as a sequence of bytes. So when running under Python 3,
Hy translates ``"foo"`` and ``b"foo"`` to the identical Python code, but when
running under Python 2, ``"foo"`` is translated to ``u"foo"`` and ``b"foo"`` is
translated to ``"foo"``.
Built-Ins
=========

View File

@ -113,6 +113,12 @@ Hy literal strings can span multiple lines, and are considered by the
parser as a single unit, respecting the Python escapes for unicode
strings.
HyBytes
~~~~~~~
``hy.models.HyBytes`` is like ``HyString``, but for sequences of bytes.
It inherits from ``bytes`` on Python 3 and ``str`` on Python 2.
.. _hy_numeric_models:
Numeric Models

View File

@ -49,6 +49,11 @@ if PY3:
else:
str_type = unicode # NOQA
if PY3:
bytes_type = bytes
else:
bytes_type = str
if PY3:
long_type = int
else:

View File

@ -25,14 +25,15 @@
# DEALINGS IN THE SOFTWARE.
from hy.models import (HyExpression, HyKeyword, HyInteger, HyComplex, HyString,
HySymbol, HyFloat, HyList, HySet, HyDict, HyCons)
HyBytes, HySymbol, HyFloat, HyList, HySet, HyDict,
HyCons)
from hy.errors import HyCompileError, HyTypeError
from hy.lex.parser import hy_symbol_mangle
import hy.macros
from hy._compat import (
str_type, long_type, PY27, PY33, PY3, PY34, PY35, raise_empty)
str_type, bytes_type, long_type, PY27, PY33, PY3, PY34, PY35, raise_empty)
from hy.macros import require, macroexpand, reader_macroexpand
import hy.importer
@ -2641,6 +2642,13 @@ class HyASTCompiler(object):
lineno=string.start_line,
col_offset=string.start_column)
@builds(HyBytes)
def compile_bytes(self, bytestring):
f = ast.Bytes if PY3 else ast.Str
return f(s=bytes_type(bytestring),
lineno=bytestring.start_line,
col_offset=bytestring.start_column)
@builds(HyKeyword)
def compile_keyword(self, keyword):
return ast.Str(s=str_type(keyword),

View File

@ -46,7 +46,7 @@ lg.add('HASHREADER', r'#[^{]')
# A regexp which matches incomplete strings, used to support
# multi-line strings in the interpreter
partial_string = r'''(?x)
(?:u|r|ur|ru)? # prefix
(?:u|r|ur|ru|b|br|rb)? # prefix
" # start string
(?:
| [^"\\] # non-quote or backslash

View File

@ -18,13 +18,15 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
import sys
from functools import wraps
from ast import literal_eval
from rply import ParserGenerator
from hy.models import (HyComplex, HyCons, HyDict, HyExpression, HyFloat,
HyInteger, HyKeyword, HyList, HySet, HyString, HySymbol)
from hy._compat import PY3, str_type
from hy.models import (HyBytes, HyComplex, HyCons, HyDict, HyExpression,
HyFloat, HyInteger, HyKeyword, HyList, HySet, HyString,
HySymbol)
from .lexer import lexer
from .exceptions import LexException, PrematureEndOfInput
@ -55,8 +57,6 @@ def hy_symbol_unmangle(p):
# hy_symbol_mangle is one-way, so this can't be perfect.
# But it can be useful till we have a way to get the original
# symbol (https://github.com/hylang/hy/issues/360).
from hy._compat import str_type
p = str_type(p)
if p.endswith("_bang") and p != "_bang":
@ -258,12 +258,19 @@ def t_empty_list(p):
return HyList([])
if sys.version_info[0] >= 3:
if PY3:
def uni_hystring(s):
return HyString(eval(s))
return HyString(literal_eval(s))
def hybytes(s):
return HyBytes(literal_eval('b'+s))
else:
def uni_hystring(s):
return HyString(eval('u'+s))
return HyString(literal_eval('u'+s))
def hybytes(s):
return HyBytes(literal_eval(s))
@pg.production("string : STRING")
@ -273,11 +280,16 @@ def t_string(p):
s = p[0].value[:-1]
# get the header
header, s = s.split('"', 1)
# remove unicode marker
# remove unicode marker (this is redundant because Hy string
# literals already, by default, generate Unicode literals
# under Python 2)
header = header.replace("u", "")
# remove bytes marker, since we'll need to exclude it for Python 2
is_bytestring = "b" in header
header = header.replace("b", "")
# build python string
s = header + '"""' + s + '"""'
return uni_hystring(s)
return (hybytes if is_bytestring else uni_hystring)(s)
@pg.production("string : PARTIAL_STRING")

View File

@ -19,7 +19,7 @@
# DEALINGS IN THE SOFTWARE.
from __future__ import unicode_literals
from hy._compat import PY3, str_type, long_type, string_types
from hy._compat import PY3, str_type, bytes_type, long_type, string_types
class HyObject(object):
@ -84,6 +84,16 @@ class HyString(HyObject, str_type):
_wrappers[str_type] = HyString
class HyBytes(HyObject, bytes_type):
"""
Generic Hy Bytes object. It's either a ``bytes`` or a ``str``, depending
on the Python version.
"""
pass
_wrappers[bytes_type] = HyBytes
class HySymbol(HyString):
"""
Hy Symbol. Basically a String.

View File

@ -482,6 +482,15 @@ def test_ast_unicode_strings():
assert _compile_string("\xc3\xa9") == "\xc3\xa9"
def test_ast_unicode_vs_bytes():
def f(x): return hy_compile(tokenize(x), "__main__").body[0].value.s
assert f('"hello"') == u"hello"
assert type(f('"hello"')) is (str if PY3 else unicode) # noqa
assert f('b"hello"') == (eval('b"hello"') if PY3 else "hello")
assert type(f('b"hello"')) == (bytes if PY3 else str)
assert f('b"\\xa0"') == (bytes([160]) if PY3 else chr(160))
def test_compile_error():
"""Ensure we get compile error in tricky cases"""
try:

View File

@ -30,6 +30,9 @@
(defmacro a-string [] "foo")
(assert (= (a-string) "foo"))
(defmacro a-bytes [] b"foo")
(assert (= (a-bytes) b"foo"))
(defmacro a-list [] [1 2])
(assert (= (a-list) [1 2]))