Add bytestring literals
This commit is contained in:
parent
92d5d6b42c
commit
45b7a4ac9d
2
NEWS
2
NEWS
@ -3,6 +3,8 @@ Changes from 0.12.1
|
||||
[ Language Changes ]
|
||||
* `let` has been removed. Python's scoping rules do not make a proper
|
||||
implementation of it possible. Use `setv` instead.
|
||||
* Added bytestring literals, which create `bytes` objects under Python 3
|
||||
and `str` objects under Python 2
|
||||
* Commas and underscores are allowed in numeric literals
|
||||
* xor: If exactly one argument is true, return it
|
||||
|
||||
|
@ -52,6 +52,19 @@ digits.
|
||||
|
||||
(print 10,000,000,000 10_000_000_000)
|
||||
|
||||
string literals
|
||||
---------------
|
||||
|
||||
Unlike Python, Hy allows only double-quoted strings. The single-quote character
|
||||
is reserved for preventing the evaluation of a form, as in most Lisps.
|
||||
|
||||
Whether running under Python 2 or Python 3, Hy treats string literals as
|
||||
sequences of Unicode characters by default, and allows you to prefix a literal
|
||||
with ``b`` to treat it as a sequence of bytes. So when running under Python 3,
|
||||
Hy translates ``"foo"`` and ``b"foo"`` to the identical Python code, but when
|
||||
running under Python 2, ``"foo"`` is translated to ``u"foo"`` and ``b"foo"`` is
|
||||
translated to ``"foo"``.
|
||||
|
||||
Built-Ins
|
||||
=========
|
||||
|
||||
|
@ -113,6 +113,12 @@ Hy literal strings can span multiple lines, and are considered by the
|
||||
parser as a single unit, respecting the Python escapes for unicode
|
||||
strings.
|
||||
|
||||
HyBytes
|
||||
~~~~~~~
|
||||
|
||||
``hy.models.HyBytes`` is like ``HyString``, but for sequences of bytes.
|
||||
It inherits from ``bytes`` on Python 3 and ``str`` on Python 2.
|
||||
|
||||
.. _hy_numeric_models:
|
||||
|
||||
Numeric Models
|
||||
|
@ -49,6 +49,11 @@ if PY3:
|
||||
else:
|
||||
str_type = unicode # NOQA
|
||||
|
||||
if PY3:
|
||||
bytes_type = bytes
|
||||
else:
|
||||
bytes_type = str
|
||||
|
||||
if PY3:
|
||||
long_type = int
|
||||
else:
|
||||
|
@ -25,14 +25,15 @@
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
from hy.models import (HyExpression, HyKeyword, HyInteger, HyComplex, HyString,
|
||||
HySymbol, HyFloat, HyList, HySet, HyDict, HyCons)
|
||||
HyBytes, HySymbol, HyFloat, HyList, HySet, HyDict,
|
||||
HyCons)
|
||||
from hy.errors import HyCompileError, HyTypeError
|
||||
|
||||
from hy.lex.parser import hy_symbol_mangle
|
||||
|
||||
import hy.macros
|
||||
from hy._compat import (
|
||||
str_type, long_type, PY27, PY33, PY3, PY34, PY35, raise_empty)
|
||||
str_type, bytes_type, long_type, PY27, PY33, PY3, PY34, PY35, raise_empty)
|
||||
from hy.macros import require, macroexpand, reader_macroexpand
|
||||
import hy.importer
|
||||
|
||||
@ -2641,6 +2642,13 @@ class HyASTCompiler(object):
|
||||
lineno=string.start_line,
|
||||
col_offset=string.start_column)
|
||||
|
||||
@builds(HyBytes)
|
||||
def compile_bytes(self, bytestring):
|
||||
f = ast.Bytes if PY3 else ast.Str
|
||||
return f(s=bytes_type(bytestring),
|
||||
lineno=bytestring.start_line,
|
||||
col_offset=bytestring.start_column)
|
||||
|
||||
@builds(HyKeyword)
|
||||
def compile_keyword(self, keyword):
|
||||
return ast.Str(s=str_type(keyword),
|
||||
|
@ -46,7 +46,7 @@ lg.add('HASHREADER', r'#[^{]')
|
||||
# A regexp which matches incomplete strings, used to support
|
||||
# multi-line strings in the interpreter
|
||||
partial_string = r'''(?x)
|
||||
(?:u|r|ur|ru)? # prefix
|
||||
(?:u|r|ur|ru|b|br|rb)? # prefix
|
||||
" # start string
|
||||
(?:
|
||||
| [^"\\] # non-quote or backslash
|
||||
|
@ -18,13 +18,15 @@
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
import sys
|
||||
from functools import wraps
|
||||
from ast import literal_eval
|
||||
|
||||
from rply import ParserGenerator
|
||||
|
||||
from hy.models import (HyComplex, HyCons, HyDict, HyExpression, HyFloat,
|
||||
HyInteger, HyKeyword, HyList, HySet, HyString, HySymbol)
|
||||
from hy._compat import PY3, str_type
|
||||
from hy.models import (HyBytes, HyComplex, HyCons, HyDict, HyExpression,
|
||||
HyFloat, HyInteger, HyKeyword, HyList, HySet, HyString,
|
||||
HySymbol)
|
||||
from .lexer import lexer
|
||||
from .exceptions import LexException, PrematureEndOfInput
|
||||
|
||||
@ -55,8 +57,6 @@ def hy_symbol_unmangle(p):
|
||||
# hy_symbol_mangle is one-way, so this can't be perfect.
|
||||
# But it can be useful till we have a way to get the original
|
||||
# symbol (https://github.com/hylang/hy/issues/360).
|
||||
|
||||
from hy._compat import str_type
|
||||
p = str_type(p)
|
||||
|
||||
if p.endswith("_bang") and p != "_bang":
|
||||
@ -258,12 +258,19 @@ def t_empty_list(p):
|
||||
return HyList([])
|
||||
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
if PY3:
|
||||
def uni_hystring(s):
|
||||
return HyString(eval(s))
|
||||
return HyString(literal_eval(s))
|
||||
|
||||
def hybytes(s):
|
||||
return HyBytes(literal_eval('b'+s))
|
||||
|
||||
else:
|
||||
def uni_hystring(s):
|
||||
return HyString(eval('u'+s))
|
||||
return HyString(literal_eval('u'+s))
|
||||
|
||||
def hybytes(s):
|
||||
return HyBytes(literal_eval(s))
|
||||
|
||||
|
||||
@pg.production("string : STRING")
|
||||
@ -273,11 +280,16 @@ def t_string(p):
|
||||
s = p[0].value[:-1]
|
||||
# get the header
|
||||
header, s = s.split('"', 1)
|
||||
# remove unicode marker
|
||||
# remove unicode marker (this is redundant because Hy string
|
||||
# literals already, by default, generate Unicode literals
|
||||
# under Python 2)
|
||||
header = header.replace("u", "")
|
||||
# remove bytes marker, since we'll need to exclude it for Python 2
|
||||
is_bytestring = "b" in header
|
||||
header = header.replace("b", "")
|
||||
# build python string
|
||||
s = header + '"""' + s + '"""'
|
||||
return uni_hystring(s)
|
||||
return (hybytes if is_bytestring else uni_hystring)(s)
|
||||
|
||||
|
||||
@pg.production("string : PARTIAL_STRING")
|
||||
|
12
hy/models.py
12
hy/models.py
@ -19,7 +19,7 @@
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from hy._compat import PY3, str_type, long_type, string_types
|
||||
from hy._compat import PY3, str_type, bytes_type, long_type, string_types
|
||||
|
||||
|
||||
class HyObject(object):
|
||||
@ -84,6 +84,16 @@ class HyString(HyObject, str_type):
|
||||
_wrappers[str_type] = HyString
|
||||
|
||||
|
||||
class HyBytes(HyObject, bytes_type):
|
||||
"""
|
||||
Generic Hy Bytes object. It's either a ``bytes`` or a ``str``, depending
|
||||
on the Python version.
|
||||
"""
|
||||
pass
|
||||
|
||||
_wrappers[bytes_type] = HyBytes
|
||||
|
||||
|
||||
class HySymbol(HyString):
|
||||
"""
|
||||
Hy Symbol. Basically a String.
|
||||
|
@ -482,6 +482,15 @@ def test_ast_unicode_strings():
|
||||
assert _compile_string("\xc3\xa9") == "\xc3\xa9"
|
||||
|
||||
|
||||
def test_ast_unicode_vs_bytes():
|
||||
def f(x): return hy_compile(tokenize(x), "__main__").body[0].value.s
|
||||
assert f('"hello"') == u"hello"
|
||||
assert type(f('"hello"')) is (str if PY3 else unicode) # noqa
|
||||
assert f('b"hello"') == (eval('b"hello"') if PY3 else "hello")
|
||||
assert type(f('b"hello"')) == (bytes if PY3 else str)
|
||||
assert f('b"\\xa0"') == (bytes([160]) if PY3 else chr(160))
|
||||
|
||||
|
||||
def test_compile_error():
|
||||
"""Ensure we get compile error in tricky cases"""
|
||||
try:
|
||||
|
@ -30,6 +30,9 @@
|
||||
(defmacro a-string [] "foo")
|
||||
(assert (= (a-string) "foo"))
|
||||
|
||||
(defmacro a-bytes [] b"foo")
|
||||
(assert (= (a-bytes) b"foo"))
|
||||
|
||||
(defmacro a-list [] [1 2])
|
||||
(assert (= (a-list) [1 2]))
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user