commit
d1aad33cbd
8
NEWS.rst
8
NEWS.rst
@ -1,5 +1,13 @@
|
||||
.. default-role:: code
|
||||
|
||||
Unreleased
|
||||
==============================
|
||||
|
||||
New Features
|
||||
------------------------------
|
||||
* Format strings with embedded Hy code (e.g., `f"The sum is {(+ x y)}"`)
|
||||
are now supported, even on Pythons earlier than 3.6.
|
||||
|
||||
0.16.0
|
||||
==============================
|
||||
|
||||
|
@ -42,7 +42,8 @@ string literal called a "bracket string" similar to Lua's long brackets.
|
||||
Bracket strings have customizable delimiters, like the here-documents of other
|
||||
languages. A bracket string begins with ``#[FOO[`` and ends with ``]FOO]``,
|
||||
where ``FOO`` is any string not containing ``[`` or ``]``, including the empty
|
||||
string. For example::
|
||||
string. (If ``FOO`` is exactly ``f`` or begins with ``f-``, the bracket string
|
||||
is interpreted as a :ref:`format string <syntax-fstrings>`.) For example::
|
||||
|
||||
=> (print #[["That's very kind of yuo [sic]" Tom wrote back.]])
|
||||
"That's very kind of yuo [sic]" Tom wrote back.
|
||||
@ -69,6 +70,43 @@ of bytes. So when running under Python 3, Hy translates ``"foo"`` and
|
||||
|
||||
Unlike Python, Hy only recognizes string prefixes (``r``, etc.) in lowercase.
|
||||
|
||||
.. _syntax-fstrings:
|
||||
|
||||
format strings
|
||||
--------------
|
||||
|
||||
A format string (or "f-string", or "formatted string literal") is a string
|
||||
literal with embedded code, possibly accompanied by formatting commands. Hy
|
||||
f-strings work much like :ref:`Python f-strings <py:f-strings>` except that the
|
||||
embedded code is in Hy rather than Python, and they're supported on all
|
||||
versions of Python.
|
||||
|
||||
::
|
||||
|
||||
=> (print f"The sum is {(+ 1 1)}.")
|
||||
The sum is 2.
|
||||
|
||||
Since ``!`` and ``:`` are identifier characters in Hy, Hy decides where the
|
||||
code in a replacement field ends, and any conversion or format specifier
|
||||
begins, by parsing exactly one form. You can use ``do`` to combine several
|
||||
forms into one, as usual. Whitespace may be necessary to terminate the form::
|
||||
|
||||
=> (setv foo "a")
|
||||
=> (print f"{foo:x<5}")
|
||||
…
|
||||
NameError: name 'hyx_fooXcolonXxXlessHthan_signX5' is not defined
|
||||
=> (print f"{foo :x<5}")
|
||||
axxxx
|
||||
|
||||
Unlike Python, whitespace is allowed between a conversion and a format
|
||||
specifier.
|
||||
|
||||
Also unlike Python, comments and backslashes are allowed in replacement fields.
|
||||
Hy's lexer will still process the whole format string normally, like any other
|
||||
string, before any replacement fields are considered, so you may need to
|
||||
backslash your backslashes, and you can't comment out a closing brace or the
|
||||
string delimiter.
|
||||
|
||||
.. _syntax-keywords:
|
||||
|
||||
keywords
|
||||
|
115
hy/compiler.py
115
hy/compiler.py
@ -12,14 +12,15 @@ from funcparserlib.parser import some, many, oneplus, maybe, NoParseError
|
||||
from hy.errors import (HyCompileError, HyTypeError, HyLanguageError,
|
||||
HySyntaxError, HyEvalError, HyInternalError)
|
||||
|
||||
from hy.lex import mangle, unmangle
|
||||
from hy.lex import mangle, unmangle, hy_parse, parse_one_thing, LexException
|
||||
|
||||
from hy._compat import (string_types, str_type, bytes_type, long_type, PY3,
|
||||
PY35, reraise)
|
||||
PY35, PY36, reraise)
|
||||
from hy.macros import require, load_macros, macroexpand, tag_macroexpand
|
||||
|
||||
import hy.core
|
||||
|
||||
import re
|
||||
import pkgutil
|
||||
import traceback
|
||||
import importlib
|
||||
@ -31,6 +32,7 @@ import copy
|
||||
import __future__
|
||||
|
||||
from collections import defaultdict
|
||||
from functools import reduce
|
||||
|
||||
if PY3:
|
||||
import builtins
|
||||
@ -629,8 +631,11 @@ class HyASTCompiler(object):
|
||||
elif isinstance(form, HyKeyword):
|
||||
body = [HyString(form.name)]
|
||||
|
||||
elif isinstance(form, HyString) and form.brackets is not None:
|
||||
body.extend([HyKeyword("brackets"), form.brackets])
|
||||
elif isinstance(form, HyString):
|
||||
if form.is_format:
|
||||
body.extend([HyKeyword("is_format"), form.is_format])
|
||||
if form.brackets is not None:
|
||||
body.extend([HyKeyword("brackets"), form.brackets])
|
||||
|
||||
ret = HyExpression([HySymbol(name)] + body).replace(form)
|
||||
return imports, ret, False
|
||||
@ -1798,10 +1803,112 @@ class HyASTCompiler(object):
|
||||
|
||||
@builds_model(HyString, HyBytes)
|
||||
def compile_string(self, string):
|
||||
if type(string) is HyString and string.is_format:
|
||||
# This is a format string (a.k.a. an f-string).
|
||||
return self._format_string(string, str_type(string))
|
||||
node = asty.Bytes if PY3 and type(string) is HyBytes else asty.Str
|
||||
f = bytes_type if type(string) is HyBytes else str_type
|
||||
return node(string, s=f(string))
|
||||
|
||||
def _format_string(self, string, rest, allow_recursion=True):
|
||||
values = []
|
||||
ret = Result()
|
||||
|
||||
while True:
|
||||
# Look for the next replacement field, and get the
|
||||
# plain text before it.
|
||||
match = re.search(r'\{\{?|\}\}?', rest)
|
||||
if match:
|
||||
literal_chars = rest[: match.start()]
|
||||
if match.group() == '}':
|
||||
raise self._syntax_error(string,
|
||||
"f-string: single '}' is not allowed")
|
||||
if match.group() in ('{{', '}}'):
|
||||
# Doubled braces just add a single brace to the text.
|
||||
literal_chars += match.group()[0]
|
||||
rest = rest[match.end() :]
|
||||
else:
|
||||
literal_chars = rest
|
||||
rest = ""
|
||||
if literal_chars:
|
||||
values.append(asty.Str(string, s = literal_chars))
|
||||
if not rest:
|
||||
break
|
||||
if match.group() != '{':
|
||||
continue
|
||||
|
||||
# Look for the end of the replacement field, allowing
|
||||
# one more level of matched braces, but no deeper, and only
|
||||
# if we can recurse.
|
||||
match = re.match(
|
||||
r'(?: \{ [^{}]* \} | [^{}]+ )* \}'
|
||||
if allow_recursion
|
||||
else r'[^{}]* \}',
|
||||
rest, re.VERBOSE)
|
||||
if not match:
|
||||
raise self._syntax_error(string, 'f-string: mismatched braces')
|
||||
item = rest[: match.end() - 1]
|
||||
rest = rest[match.end() :]
|
||||
|
||||
# Parse the first form.
|
||||
try:
|
||||
model, item = parse_one_thing(item)
|
||||
except (ValueError, LexException) as e:
|
||||
raise self._syntax_error(string, "f-string: " + str_type(e))
|
||||
|
||||
# Look for a conversion character.
|
||||
item = item.lstrip()
|
||||
conversion = None
|
||||
if item.startswith('!'):
|
||||
conversion = item[1]
|
||||
item = item[2:].lstrip()
|
||||
|
||||
# Look for a format specifier.
|
||||
format_spec = asty.Str(string, s = "")
|
||||
if item.startswith(':'):
|
||||
if allow_recursion:
|
||||
ret += self._format_string(string,
|
||||
item[1:],
|
||||
allow_recursion=False)
|
||||
format_spec = ret.force_expr
|
||||
else:
|
||||
format_spec = asty.Str(string, s=item[1:])
|
||||
elif item:
|
||||
raise self._syntax_error(string,
|
||||
"f-string: trailing junk in field")
|
||||
|
||||
# Now, having finished compiling any recursively included
|
||||
# forms, we can compile the first form that we parsed.
|
||||
ret += self.compile(model)
|
||||
|
||||
if PY36:
|
||||
values.append(asty.FormattedValue(
|
||||
string,
|
||||
conversion = -1 if conversion is None else ord(conversion),
|
||||
format_spec = format_spec,
|
||||
value = ret.force_expr))
|
||||
else:
|
||||
# Make an expression like:
|
||||
# "{!r:{}}".format(value, format_spec)
|
||||
values.append(asty.Call(string,
|
||||
func = asty.Attribute(
|
||||
string,
|
||||
value = asty.Str(string, s =
|
||||
'{' +
|
||||
('!' + conversion if conversion else '') +
|
||||
':{}}'),
|
||||
attr = 'format', ctx = ast.Load()),
|
||||
args = [ret.force_expr, format_spec],
|
||||
keywords = [], starargs = None, kwargs = None))
|
||||
|
||||
return ret + (
|
||||
asty.JoinedStr(string, values = values)
|
||||
if PY36
|
||||
else reduce(
|
||||
lambda x, y:
|
||||
asty.BinOp(string, left = x, op = ast.Add(), right = y),
|
||||
values))
|
||||
|
||||
@builds_model(HyList, HySet)
|
||||
def compile_list(self, expression):
|
||||
elts, ret, _ = self._compile_collect(expression)
|
||||
|
@ -74,6 +74,33 @@ def tokenize(source, filename=None):
|
||||
raise e
|
||||
|
||||
|
||||
def parse_one_thing(src_string):
|
||||
"""Parse the first form from the string. Return it and the
|
||||
remainder of the string."""
|
||||
import re
|
||||
from hy.lex.lexer import lexer
|
||||
from hy.lex.parser import parser
|
||||
from rply.errors import LexingError
|
||||
tokens = []
|
||||
err = None
|
||||
for token in lexer.lex(src_string):
|
||||
tokens.append(token)
|
||||
try:
|
||||
model, = parser.parse(
|
||||
iter(tokens),
|
||||
state=ParserState(src_string, filename=None))
|
||||
except (LexingError, LexException) as e:
|
||||
err = e
|
||||
else:
|
||||
return model, src_string[re.match(
|
||||
r'.+\n' * (model.end_line - 1)
|
||||
+ '.' * model.end_column,
|
||||
src_string).end():]
|
||||
if err:
|
||||
raise err
|
||||
raise ValueError("No form found")
|
||||
|
||||
|
||||
mangle_delim = 'X'
|
||||
|
||||
|
||||
|
@ -38,7 +38,7 @@ lg.add('HASHOTHER', r'#%s' % identifier)
|
||||
# A regexp which matches incomplete strings, used to support
|
||||
# multi-line strings in the interpreter
|
||||
partial_string = r'''(?x)
|
||||
(?:u|r|ur|ru|b|br|rb)? # prefix
|
||||
(?:u|r|ur|ru|b|br|rb|f|fr|rf)? # prefix
|
||||
" # start string
|
||||
(?:
|
||||
| [^"\\] # non-quote or backslash
|
||||
|
@ -31,8 +31,11 @@ def set_boundaries(fun):
|
||||
ret.end_line = end.lineno
|
||||
ret.end_column = end.colno
|
||||
else:
|
||||
ret.end_line = start.lineno
|
||||
ret.end_column = start.colno + len(p[0].value)
|
||||
v = p[0].value
|
||||
ret.end_line = start.lineno + v.count('\n')
|
||||
ret.end_column = (len(v) - v.rindex('\n') - 1
|
||||
if '\n' in v
|
||||
else start.colno + len(v) - 1)
|
||||
return ret
|
||||
return wrapped
|
||||
|
||||
@ -197,14 +200,22 @@ def t_empty_list(state, p):
|
||||
@pg.production("string : STRING")
|
||||
@set_boundaries
|
||||
def t_string(state, p):
|
||||
s = p[0].value
|
||||
# Detect and remove any "f" prefix.
|
||||
is_format = False
|
||||
if s.startswith('f') or s.startswith('rf'):
|
||||
is_format = True
|
||||
s = s.replace('f', '', 1)
|
||||
# Replace the single double quotes with triple double quotes to allow
|
||||
# embedded newlines.
|
||||
try:
|
||||
s = eval(p[0].value.replace('"', '"""', 1)[:-1] + '"""')
|
||||
s = eval(s.replace('"', '"""', 1)[:-1] + '"""')
|
||||
except SyntaxError:
|
||||
raise LexException.from_lexer("Can't convert {} to a HyString".format(p[0].value),
|
||||
state, p[0])
|
||||
return (HyString if isinstance(s, str_type) else HyBytes)(s)
|
||||
return (HyString(s, is_format = is_format)
|
||||
if isinstance(s, str_type)
|
||||
else HyBytes(s))
|
||||
|
||||
|
||||
@pg.production("string : PARTIAL_STRING")
|
||||
@ -219,7 +230,10 @@ bracket_string_re = next(r.re for r in lexer.rules if r.name == 'BRACKETSTRING')
|
||||
def t_bracket_string(state, p):
|
||||
m = bracket_string_re.match(p[0].value)
|
||||
delim, content = m.groups()
|
||||
return HyString(content, brackets=delim)
|
||||
return HyString(
|
||||
content,
|
||||
is_format = delim == 'f' or delim.startswith('f-'),
|
||||
brackets = delim)
|
||||
|
||||
|
||||
@pg.production("identifier : IDENTIFIER")
|
||||
|
@ -33,6 +33,11 @@ class HyObject(object):
|
||||
"""
|
||||
Generic Hy Object model. This is helpful to inject things into all the
|
||||
Hy lexing Objects at once.
|
||||
|
||||
The position properties (`start_line`, `end_line`, `start_column`,
|
||||
`end_column`) are each 1-based and inclusive. For example, a symbol
|
||||
`abc` starting at the first column would have `start_column` 1 and
|
||||
`end_column` 3.
|
||||
"""
|
||||
__properties__ = ["module", "start_line", "end_line", "start_column",
|
||||
"end_column"]
|
||||
@ -89,8 +94,9 @@ class HyString(HyObject, str_type):
|
||||
scripts. It's either a ``str`` or a ``unicode``, depending on the
|
||||
Python version.
|
||||
"""
|
||||
def __new__(cls, s=None, brackets=None):
|
||||
def __new__(cls, s=None, is_format=False, brackets=None):
|
||||
value = super(HyString, cls).__new__(cls, s)
|
||||
value.is_format = bool(is_format)
|
||||
value.brackets = brackets
|
||||
return value
|
||||
|
||||
|
@ -10,7 +10,7 @@ from hy.compiler import hy_compile, hy_eval
|
||||
from hy.errors import HyCompileError, HyLanguageError, HyError
|
||||
from hy.lex import hy_parse
|
||||
from hy.lex.exceptions import LexException, PrematureEndOfInput
|
||||
from hy._compat import PY3
|
||||
from hy._compat import PY3, PY36
|
||||
|
||||
import ast
|
||||
import pytest
|
||||
@ -511,6 +511,18 @@ def test_ast_unicode_vs_bytes():
|
||||
assert s('b"\\xa0"') == (bytes([160]) if PY3 else chr(160))
|
||||
|
||||
|
||||
@pytest.mark.skipif(not PY36, reason='f-strings require Python 3.6+')
|
||||
def test_format_string():
|
||||
assert can_compile('f"hello world"')
|
||||
assert can_compile('f"hello {(+ 1 1)} world"')
|
||||
assert can_compile('f"hello world {(+ 1 1)}"')
|
||||
assert cant_compile('f"hello {(+ 1 1) world"')
|
||||
assert cant_compile('f"hello (+ 1 1)} world"')
|
||||
assert cant_compile('f"hello {(+ 1 1} world"')
|
||||
assert can_compile(r'f"hello {\"n\"} world"')
|
||||
assert can_compile(r'f"hello {\"\\n\"} world"')
|
||||
|
||||
|
||||
def test_ast_bracket_string():
|
||||
assert s(r'#[[empty delims]]') == 'empty delims'
|
||||
assert s(r'#[my delim[fizzle]my delim]') == 'fizzle'
|
||||
|
@ -1217,6 +1217,72 @@
|
||||
(assert (none? (. '"squid" brackets))))
|
||||
|
||||
|
||||
(defn test-format-strings []
|
||||
(assert (= f"hello world" "hello world"))
|
||||
(assert (= f"hello {(+ 1 1)} world" "hello 2 world"))
|
||||
(assert (= f"a{ (.upper (+ \"g\" \"k\")) }z" "aGKz"))
|
||||
|
||||
; Referring to a variable
|
||||
(setv p "xyzzy")
|
||||
(assert (= f"h{p}j" "hxyzzyj"))
|
||||
|
||||
; Including a statement and setting a variable
|
||||
(assert (= f"a{(do (setv floop 4) (* floop 2))}z" "a8z"))
|
||||
(assert (= floop 4))
|
||||
|
||||
; Comments
|
||||
(assert (= f"a{(+ 1
|
||||
2 ; This is a comment.
|
||||
3)}z" "a6z"))
|
||||
|
||||
; Newlines in replacement fields
|
||||
(assert (= f"ey {\"bee
|
||||
cee\"} dee" "ey bee\ncee dee"))
|
||||
|
||||
; Conversion characters and format specifiers
|
||||
(setv p:9 "other")
|
||||
(setv !r "bar")
|
||||
(defn u [s]
|
||||
; Add a "u" prefix for Python 2.
|
||||
(if PY3
|
||||
s
|
||||
(.replace (.replace s "'" "u'" 1) " " " " 1)))
|
||||
(assert (= f"a{p !r}" (u "a'xyzzy'")))
|
||||
(assert (= f"a{p :9}" "axyzzy "))
|
||||
(assert (= f"a{p:9}" "aother"))
|
||||
(assert (= f"a{p !r :9}" (u "a'xyzzy' ")))
|
||||
(assert (= f"a{p !r:9}" (u "a'xyzzy' ")))
|
||||
(assert (= f"a{p:9 :9}" "aother "))
|
||||
(assert (= f"a{!r}" "abar"))
|
||||
(assert (= f"a{!r !r}" (u "a'bar'")))
|
||||
|
||||
; Fun with `r`
|
||||
(assert (= f"hello {r\"\\n\"}" r"hello \n"))
|
||||
(assert (= f"hello {r\"\n\"}" "hello \n"))
|
||||
; The `r` applies too late to avoid interpreting a backslash.
|
||||
|
||||
; Braces escaped via doubling
|
||||
(assert (= f"ab{{cde" "ab{cde"))
|
||||
(assert (= f"ab{{cde}}}}fg{{{{{{" "ab{cde}}fg{{{"))
|
||||
(assert (= f"ab{{{(+ 1 1)}}}" "ab{2}"))
|
||||
|
||||
; Nested replacement fields
|
||||
(assert (= f"{2 :{(+ 2 2)}}" " 2"))
|
||||
(setv value 12.34 width 10 precision 4)
|
||||
(assert (= f"result: {value :{width}.{precision}}" "result: 12.34"))
|
||||
|
||||
; Nested replacement fields with ! and :
|
||||
(defclass C [object]
|
||||
(defn __format__ [self format-spec]
|
||||
(+ "C[" format-spec "]")))
|
||||
(assert (= f"{(C) : {(str (+ 1 1)) !r :x<5}}" "C[ '2'xx]"))
|
||||
|
||||
; Format bracket strings
|
||||
(assert (= #[f[a{p !r :9}]f] (u "a'xyzzy' ")))
|
||||
(assert (= #[f-string[result: {value :{width}.{precision}}]f-string]
|
||||
"result: 12.34")))
|
||||
|
||||
|
||||
(defn test-import-syntax []
|
||||
"NATIVE: test the import syntax."
|
||||
|
||||
|
@ -240,22 +240,38 @@ def test_lex_bad_attrs():
|
||||
with lexe(): tokenize(":hello.foo")
|
||||
|
||||
|
||||
def test_lex_line_counting():
|
||||
""" Make sure we can count lines / columns """
|
||||
def test_lex_column_counting():
|
||||
entry = tokenize("(foo (one two))")[0]
|
||||
|
||||
assert entry.start_line == 1
|
||||
assert entry.start_column == 1
|
||||
|
||||
assert entry.end_line == 1
|
||||
assert entry.end_column == 15
|
||||
|
||||
entry = entry[1]
|
||||
assert entry.start_line == 1
|
||||
assert entry.start_column == 6
|
||||
symbol = entry[0]
|
||||
assert symbol.start_line == 1
|
||||
assert symbol.start_column == 2
|
||||
assert symbol.end_line == 1
|
||||
assert symbol.end_column == 4
|
||||
|
||||
assert entry.end_line == 1
|
||||
assert entry.end_column == 14
|
||||
inner_expr = entry[1]
|
||||
assert inner_expr.start_line == 1
|
||||
assert inner_expr.start_column == 6
|
||||
assert inner_expr.end_line == 1
|
||||
assert inner_expr.end_column == 14
|
||||
|
||||
|
||||
def test_lex_column_counting_with_literal_newline():
|
||||
string, symbol = tokenize('"apple\nblueberry" abc')
|
||||
|
||||
assert string.start_line == 1
|
||||
assert string.start_column == 1
|
||||
assert string.end_line == 2
|
||||
assert string.end_column == 10
|
||||
|
||||
assert symbol.start_line == 2
|
||||
assert symbol.start_column == 12
|
||||
assert symbol.end_line == 2
|
||||
assert symbol.end_column == 14
|
||||
|
||||
|
||||
def test_lex_line_counting_multi():
|
||||
|
Loading…
x
Reference in New Issue
Block a user