Merge pull request #1749 from Kodiologist/fstrings

Format strings
This commit is contained in:
Kodi Arfer 2019-03-04 14:05:07 -05:00 committed by GitHub
commit d1aad33cbd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 316 additions and 22 deletions

View File

@ -1,5 +1,13 @@
.. default-role:: code .. default-role:: code
Unreleased
==============================
New Features
------------------------------
* Format strings with embedded Hy code (e.g., `f"The sum is {(+ x y)}"`)
are now supported, even on Pythons earlier than 3.6.
0.16.0 0.16.0
============================== ==============================

View File

@ -42,7 +42,8 @@ string literal called a "bracket string" similar to Lua's long brackets.
Bracket strings have customizable delimiters, like the here-documents of other Bracket strings have customizable delimiters, like the here-documents of other
languages. A bracket string begins with ``#[FOO[`` and ends with ``]FOO]``, languages. A bracket string begins with ``#[FOO[`` and ends with ``]FOO]``,
where ``FOO`` is any string not containing ``[`` or ``]``, including the empty where ``FOO`` is any string not containing ``[`` or ``]``, including the empty
string. For example:: string. (If ``FOO`` is exactly ``f`` or begins with ``f-``, the bracket string
is interpreted as a :ref:`format string <syntax-fstrings>`.) For example::
=> (print #[["That's very kind of yuo [sic]" Tom wrote back.]]) => (print #[["That's very kind of yuo [sic]" Tom wrote back.]])
"That's very kind of yuo [sic]" Tom wrote back. "That's very kind of yuo [sic]" Tom wrote back.
@ -69,6 +70,43 @@ of bytes. So when running under Python 3, Hy translates ``"foo"`` and
Unlike Python, Hy only recognizes string prefixes (``r``, etc.) in lowercase. Unlike Python, Hy only recognizes string prefixes (``r``, etc.) in lowercase.
.. _syntax-fstrings:
format strings
--------------
A format string (or "f-string", or "formatted string literal") is a string
literal with embedded code, possibly accompanied by formatting commands. Hy
f-strings work much like :ref:`Python f-strings <py:f-strings>` except that the
embedded code is in Hy rather than Python, and they're supported on all
versions of Python.
::
=> (print f"The sum is {(+ 1 1)}.")
The sum is 2.
Since ``!`` and ``:`` are identifier characters in Hy, Hy decides where the
code in a replacement field ends, and any conversion or format specifier
begins, by parsing exactly one form. You can use ``do`` to combine several
forms into one, as usual. Whitespace may be necessary to terminate the form::
=> (setv foo "a")
=> (print f"{foo:x<5}")
NameError: name 'hyx_fooXcolonXxXlessHthan_signX5' is not defined
=> (print f"{foo :x<5}")
axxxx
Unlike Python, whitespace is allowed between a conversion and a format
specifier.
Also unlike Python, comments and backslashes are allowed in replacement fields.
Hy's lexer will still process the whole format string normally, like any other
string, before any replacement fields are considered, so you may need to
backslash your backslashes, and you can't comment out a closing brace or the
string delimiter.
.. _syntax-keywords: .. _syntax-keywords:
keywords keywords

View File

@ -12,14 +12,15 @@ from funcparserlib.parser import some, many, oneplus, maybe, NoParseError
from hy.errors import (HyCompileError, HyTypeError, HyLanguageError, from hy.errors import (HyCompileError, HyTypeError, HyLanguageError,
HySyntaxError, HyEvalError, HyInternalError) HySyntaxError, HyEvalError, HyInternalError)
from hy.lex import mangle, unmangle from hy.lex import mangle, unmangle, hy_parse, parse_one_thing, LexException
from hy._compat import (string_types, str_type, bytes_type, long_type, PY3, from hy._compat import (string_types, str_type, bytes_type, long_type, PY3,
PY35, reraise) PY35, PY36, reraise)
from hy.macros import require, load_macros, macroexpand, tag_macroexpand from hy.macros import require, load_macros, macroexpand, tag_macroexpand
import hy.core import hy.core
import re
import pkgutil import pkgutil
import traceback import traceback
import importlib import importlib
@ -31,6 +32,7 @@ import copy
import __future__ import __future__
from collections import defaultdict from collections import defaultdict
from functools import reduce
if PY3: if PY3:
import builtins import builtins
@ -629,8 +631,11 @@ class HyASTCompiler(object):
elif isinstance(form, HyKeyword): elif isinstance(form, HyKeyword):
body = [HyString(form.name)] body = [HyString(form.name)]
elif isinstance(form, HyString) and form.brackets is not None: elif isinstance(form, HyString):
body.extend([HyKeyword("brackets"), form.brackets]) if form.is_format:
body.extend([HyKeyword("is_format"), form.is_format])
if form.brackets is not None:
body.extend([HyKeyword("brackets"), form.brackets])
ret = HyExpression([HySymbol(name)] + body).replace(form) ret = HyExpression([HySymbol(name)] + body).replace(form)
return imports, ret, False return imports, ret, False
@ -1798,10 +1803,112 @@ class HyASTCompiler(object):
@builds_model(HyString, HyBytes) @builds_model(HyString, HyBytes)
def compile_string(self, string): def compile_string(self, string):
if type(string) is HyString and string.is_format:
# This is a format string (a.k.a. an f-string).
return self._format_string(string, str_type(string))
node = asty.Bytes if PY3 and type(string) is HyBytes else asty.Str node = asty.Bytes if PY3 and type(string) is HyBytes else asty.Str
f = bytes_type if type(string) is HyBytes else str_type f = bytes_type if type(string) is HyBytes else str_type
return node(string, s=f(string)) return node(string, s=f(string))
def _format_string(self, string, rest, allow_recursion=True):
values = []
ret = Result()
while True:
# Look for the next replacement field, and get the
# plain text before it.
match = re.search(r'\{\{?|\}\}?', rest)
if match:
literal_chars = rest[: match.start()]
if match.group() == '}':
raise self._syntax_error(string,
"f-string: single '}' is not allowed")
if match.group() in ('{{', '}}'):
# Doubled braces just add a single brace to the text.
literal_chars += match.group()[0]
rest = rest[match.end() :]
else:
literal_chars = rest
rest = ""
if literal_chars:
values.append(asty.Str(string, s = literal_chars))
if not rest:
break
if match.group() != '{':
continue
# Look for the end of the replacement field, allowing
# one more level of matched braces, but no deeper, and only
# if we can recurse.
match = re.match(
r'(?: \{ [^{}]* \} | [^{}]+ )* \}'
if allow_recursion
else r'[^{}]* \}',
rest, re.VERBOSE)
if not match:
raise self._syntax_error(string, 'f-string: mismatched braces')
item = rest[: match.end() - 1]
rest = rest[match.end() :]
# Parse the first form.
try:
model, item = parse_one_thing(item)
except (ValueError, LexException) as e:
raise self._syntax_error(string, "f-string: " + str_type(e))
# Look for a conversion character.
item = item.lstrip()
conversion = None
if item.startswith('!'):
conversion = item[1]
item = item[2:].lstrip()
# Look for a format specifier.
format_spec = asty.Str(string, s = "")
if item.startswith(':'):
if allow_recursion:
ret += self._format_string(string,
item[1:],
allow_recursion=False)
format_spec = ret.force_expr
else:
format_spec = asty.Str(string, s=item[1:])
elif item:
raise self._syntax_error(string,
"f-string: trailing junk in field")
# Now, having finished compiling any recursively included
# forms, we can compile the first form that we parsed.
ret += self.compile(model)
if PY36:
values.append(asty.FormattedValue(
string,
conversion = -1 if conversion is None else ord(conversion),
format_spec = format_spec,
value = ret.force_expr))
else:
# Make an expression like:
# "{!r:{}}".format(value, format_spec)
values.append(asty.Call(string,
func = asty.Attribute(
string,
value = asty.Str(string, s =
'{' +
('!' + conversion if conversion else '') +
':{}}'),
attr = 'format', ctx = ast.Load()),
args = [ret.force_expr, format_spec],
keywords = [], starargs = None, kwargs = None))
return ret + (
asty.JoinedStr(string, values = values)
if PY36
else reduce(
lambda x, y:
asty.BinOp(string, left = x, op = ast.Add(), right = y),
values))
@builds_model(HyList, HySet) @builds_model(HyList, HySet)
def compile_list(self, expression): def compile_list(self, expression):
elts, ret, _ = self._compile_collect(expression) elts, ret, _ = self._compile_collect(expression)

View File

@ -74,6 +74,33 @@ def tokenize(source, filename=None):
raise e raise e
def parse_one_thing(src_string):
"""Parse the first form from the string. Return it and the
remainder of the string."""
import re
from hy.lex.lexer import lexer
from hy.lex.parser import parser
from rply.errors import LexingError
tokens = []
err = None
for token in lexer.lex(src_string):
tokens.append(token)
try:
model, = parser.parse(
iter(tokens),
state=ParserState(src_string, filename=None))
except (LexingError, LexException) as e:
err = e
else:
return model, src_string[re.match(
r'.+\n' * (model.end_line - 1)
+ '.' * model.end_column,
src_string).end():]
if err:
raise err
raise ValueError("No form found")
mangle_delim = 'X' mangle_delim = 'X'

View File

@ -38,7 +38,7 @@ lg.add('HASHOTHER', r'#%s' % identifier)
# A regexp which matches incomplete strings, used to support # A regexp which matches incomplete strings, used to support
# multi-line strings in the interpreter # multi-line strings in the interpreter
partial_string = r'''(?x) partial_string = r'''(?x)
(?:u|r|ur|ru|b|br|rb)? # prefix (?:u|r|ur|ru|b|br|rb|f|fr|rf)? # prefix
" # start string " # start string
(?: (?:
| [^"\\] # non-quote or backslash | [^"\\] # non-quote or backslash

View File

@ -31,8 +31,11 @@ def set_boundaries(fun):
ret.end_line = end.lineno ret.end_line = end.lineno
ret.end_column = end.colno ret.end_column = end.colno
else: else:
ret.end_line = start.lineno v = p[0].value
ret.end_column = start.colno + len(p[0].value) ret.end_line = start.lineno + v.count('\n')
ret.end_column = (len(v) - v.rindex('\n') - 1
if '\n' in v
else start.colno + len(v) - 1)
return ret return ret
return wrapped return wrapped
@ -197,14 +200,22 @@ def t_empty_list(state, p):
@pg.production("string : STRING") @pg.production("string : STRING")
@set_boundaries @set_boundaries
def t_string(state, p): def t_string(state, p):
s = p[0].value
# Detect and remove any "f" prefix.
is_format = False
if s.startswith('f') or s.startswith('rf'):
is_format = True
s = s.replace('f', '', 1)
# Replace the single double quotes with triple double quotes to allow # Replace the single double quotes with triple double quotes to allow
# embedded newlines. # embedded newlines.
try: try:
s = eval(p[0].value.replace('"', '"""', 1)[:-1] + '"""') s = eval(s.replace('"', '"""', 1)[:-1] + '"""')
except SyntaxError: except SyntaxError:
raise LexException.from_lexer("Can't convert {} to a HyString".format(p[0].value), raise LexException.from_lexer("Can't convert {} to a HyString".format(p[0].value),
state, p[0]) state, p[0])
return (HyString if isinstance(s, str_type) else HyBytes)(s) return (HyString(s, is_format = is_format)
if isinstance(s, str_type)
else HyBytes(s))
@pg.production("string : PARTIAL_STRING") @pg.production("string : PARTIAL_STRING")
@ -219,7 +230,10 @@ bracket_string_re = next(r.re for r in lexer.rules if r.name == 'BRACKETSTRING')
def t_bracket_string(state, p): def t_bracket_string(state, p):
m = bracket_string_re.match(p[0].value) m = bracket_string_re.match(p[0].value)
delim, content = m.groups() delim, content = m.groups()
return HyString(content, brackets=delim) return HyString(
content,
is_format = delim == 'f' or delim.startswith('f-'),
brackets = delim)
@pg.production("identifier : IDENTIFIER") @pg.production("identifier : IDENTIFIER")

View File

@ -33,6 +33,11 @@ class HyObject(object):
""" """
Generic Hy Object model. This is helpful to inject things into all the Generic Hy Object model. This is helpful to inject things into all the
Hy lexing Objects at once. Hy lexing Objects at once.
The position properties (`start_line`, `end_line`, `start_column`,
`end_column`) are each 1-based and inclusive. For example, a symbol
`abc` starting at the first column would have `start_column` 1 and
`end_column` 3.
""" """
__properties__ = ["module", "start_line", "end_line", "start_column", __properties__ = ["module", "start_line", "end_line", "start_column",
"end_column"] "end_column"]
@ -89,8 +94,9 @@ class HyString(HyObject, str_type):
scripts. It's either a ``str`` or a ``unicode``, depending on the scripts. It's either a ``str`` or a ``unicode``, depending on the
Python version. Python version.
""" """
def __new__(cls, s=None, brackets=None): def __new__(cls, s=None, is_format=False, brackets=None):
value = super(HyString, cls).__new__(cls, s) value = super(HyString, cls).__new__(cls, s)
value.is_format = bool(is_format)
value.brackets = brackets value.brackets = brackets
return value return value

View File

@ -10,7 +10,7 @@ from hy.compiler import hy_compile, hy_eval
from hy.errors import HyCompileError, HyLanguageError, HyError from hy.errors import HyCompileError, HyLanguageError, HyError
from hy.lex import hy_parse from hy.lex import hy_parse
from hy.lex.exceptions import LexException, PrematureEndOfInput from hy.lex.exceptions import LexException, PrematureEndOfInput
from hy._compat import PY3 from hy._compat import PY3, PY36
import ast import ast
import pytest import pytest
@ -511,6 +511,18 @@ def test_ast_unicode_vs_bytes():
assert s('b"\\xa0"') == (bytes([160]) if PY3 else chr(160)) assert s('b"\\xa0"') == (bytes([160]) if PY3 else chr(160))
@pytest.mark.skipif(not PY36, reason='f-strings require Python 3.6+')
def test_format_string():
assert can_compile('f"hello world"')
assert can_compile('f"hello {(+ 1 1)} world"')
assert can_compile('f"hello world {(+ 1 1)}"')
assert cant_compile('f"hello {(+ 1 1) world"')
assert cant_compile('f"hello (+ 1 1)} world"')
assert cant_compile('f"hello {(+ 1 1} world"')
assert can_compile(r'f"hello {\"n\"} world"')
assert can_compile(r'f"hello {\"\\n\"} world"')
def test_ast_bracket_string(): def test_ast_bracket_string():
assert s(r'#[[empty delims]]') == 'empty delims' assert s(r'#[[empty delims]]') == 'empty delims'
assert s(r'#[my delim[fizzle]my delim]') == 'fizzle' assert s(r'#[my delim[fizzle]my delim]') == 'fizzle'

View File

@ -1217,6 +1217,72 @@
(assert (none? (. '"squid" brackets)))) (assert (none? (. '"squid" brackets))))
(defn test-format-strings []
(assert (= f"hello world" "hello world"))
(assert (= f"hello {(+ 1 1)} world" "hello 2 world"))
(assert (= f"a{ (.upper (+ \"g\" \"k\")) }z" "aGKz"))
; Referring to a variable
(setv p "xyzzy")
(assert (= f"h{p}j" "hxyzzyj"))
; Including a statement and setting a variable
(assert (= f"a{(do (setv floop 4) (* floop 2))}z" "a8z"))
(assert (= floop 4))
; Comments
(assert (= f"a{(+ 1
2 ; This is a comment.
3)}z" "a6z"))
; Newlines in replacement fields
(assert (= f"ey {\"bee
cee\"} dee" "ey bee\ncee dee"))
; Conversion characters and format specifiers
(setv p:9 "other")
(setv !r "bar")
(defn u [s]
; Add a "u" prefix for Python 2.
(if PY3
s
(.replace (.replace s "'" "u'" 1) " " " " 1)))
(assert (= f"a{p !r}" (u "a'xyzzy'")))
(assert (= f"a{p :9}" "axyzzy "))
(assert (= f"a{p:9}" "aother"))
(assert (= f"a{p !r :9}" (u "a'xyzzy' ")))
(assert (= f"a{p !r:9}" (u "a'xyzzy' ")))
(assert (= f"a{p:9 :9}" "aother "))
(assert (= f"a{!r}" "abar"))
(assert (= f"a{!r !r}" (u "a'bar'")))
; Fun with `r`
(assert (= f"hello {r\"\\n\"}" r"hello \n"))
(assert (= f"hello {r\"\n\"}" "hello \n"))
; The `r` applies too late to avoid interpreting a backslash.
; Braces escaped via doubling
(assert (= f"ab{{cde" "ab{cde"))
(assert (= f"ab{{cde}}}}fg{{{{{{" "ab{cde}}fg{{{"))
(assert (= f"ab{{{(+ 1 1)}}}" "ab{2}"))
; Nested replacement fields
(assert (= f"{2 :{(+ 2 2)}}" " 2"))
(setv value 12.34 width 10 precision 4)
(assert (= f"result: {value :{width}.{precision}}" "result: 12.34"))
; Nested replacement fields with ! and :
(defclass C [object]
(defn __format__ [self format-spec]
(+ "C[" format-spec "]")))
(assert (= f"{(C) : {(str (+ 1 1)) !r :x<5}}" "C[ '2'xx]"))
; Format bracket strings
(assert (= #[f[a{p !r :9}]f] (u "a'xyzzy' ")))
(assert (= #[f-string[result: {value :{width}.{precision}}]f-string]
"result: 12.34")))
(defn test-import-syntax [] (defn test-import-syntax []
"NATIVE: test the import syntax." "NATIVE: test the import syntax."

View File

@ -240,22 +240,38 @@ def test_lex_bad_attrs():
with lexe(): tokenize(":hello.foo") with lexe(): tokenize(":hello.foo")
def test_lex_line_counting(): def test_lex_column_counting():
""" Make sure we can count lines / columns """
entry = tokenize("(foo (one two))")[0] entry = tokenize("(foo (one two))")[0]
assert entry.start_line == 1 assert entry.start_line == 1
assert entry.start_column == 1 assert entry.start_column == 1
assert entry.end_line == 1 assert entry.end_line == 1
assert entry.end_column == 15 assert entry.end_column == 15
entry = entry[1] symbol = entry[0]
assert entry.start_line == 1 assert symbol.start_line == 1
assert entry.start_column == 6 assert symbol.start_column == 2
assert symbol.end_line == 1
assert symbol.end_column == 4
assert entry.end_line == 1 inner_expr = entry[1]
assert entry.end_column == 14 assert inner_expr.start_line == 1
assert inner_expr.start_column == 6
assert inner_expr.end_line == 1
assert inner_expr.end_column == 14
def test_lex_column_counting_with_literal_newline():
string, symbol = tokenize('"apple\nblueberry" abc')
assert string.start_line == 1
assert string.start_column == 1
assert string.end_line == 2
assert string.end_column == 10
assert symbol.start_line == 2
assert symbol.start_column == 12
assert symbol.end_line == 2
assert symbol.end_column == 14
def test_lex_line_counting_multi(): def test_lex_line_counting_multi():