Implement format strings

This commit is contained in:
Kodi Arfer 2019-02-26 14:04:24 -05:00
parent 56f51a9a20
commit 5bfc140b4d
7 changed files with 215 additions and 10 deletions

View File

@ -1,5 +1,13 @@
.. default-role:: code
Unreleased
==============================
New Features
------------------------------
* Format strings with embedded Hy code (e.g., `f"The sum is {(+ x y)}"`)
are now supported, even on Pythons earlier than 3.6.
0.16.0
==============================

View File

@ -12,14 +12,15 @@ from funcparserlib.parser import some, many, oneplus, maybe, NoParseError
from hy.errors import (HyCompileError, HyTypeError, HyLanguageError,
HySyntaxError, HyEvalError, HyInternalError)
from hy.lex import mangle, unmangle
from hy.lex import mangle, unmangle, hy_parse, parse_one_thing, LexException
from hy._compat import (string_types, str_type, bytes_type, long_type, PY3,
PY35, reraise)
PY35, PY36, reraise)
from hy.macros import require, load_macros, macroexpand, tag_macroexpand
import hy.core
import re
import pkgutil
import traceback
import importlib
@ -31,6 +32,7 @@ import copy
import __future__
from collections import defaultdict
from functools import reduce
if PY3:
import builtins
@ -629,8 +631,11 @@ class HyASTCompiler(object):
elif isinstance(form, HyKeyword):
body = [HyString(form.name)]
elif isinstance(form, HyString) and form.brackets is not None:
body.extend([HyKeyword("brackets"), form.brackets])
elif isinstance(form, HyString):
if form.is_format:
body.extend([HyKeyword("is_format"), form.is_format])
if form.brackets is not None:
body.extend([HyKeyword("brackets"), form.brackets])
ret = HyExpression([HySymbol(name)] + body).replace(form)
return imports, ret, False
@ -1798,10 +1803,112 @@ class HyASTCompiler(object):
@builds_model(HyString, HyBytes)
def compile_string(self, string):
if type(string) is HyString and string.is_format:
# This is a format string (a.k.a. an f-string).
return self._format_string(string, str_type(string))
node = asty.Bytes if PY3 and type(string) is HyBytes else asty.Str
f = bytes_type if type(string) is HyBytes else str_type
return node(string, s=f(string))
def _format_string(self, string, rest, allow_recursion=True):
values = []
ret = Result()
while True:
# Look for the next replacement field, and get the
# plain text before it.
match = re.search(r'\{\{?|\}\}?', rest)
if match:
literal_chars = rest[: match.start()]
if match.group() == '}':
raise self._syntax_error(string,
"f-string: single '}' is not allowed")
if match.group() in ('{{', '}}'):
# Doubled braces just add a single brace to the text.
literal_chars += match.group()[0]
rest = rest[match.end() :]
else:
literal_chars = rest
rest = ""
if literal_chars:
values.append(asty.Str(string, s = literal_chars))
if not rest:
break
if match.group() != '{':
continue
# Look for the end of the replacement field, allowing
# one more level of matched braces, but no deeper, and only
# if we can recurse.
match = re.match(
r'(?: \{ [^{}]* \} | [^{}]+ )* \}'
if allow_recursion
else r'[^{}]* \}',
rest, re.VERBOSE)
if not match:
raise self._syntax_error(string, 'f-string: mismatched braces')
item = rest[: match.end() - 1]
rest = rest[match.end() :]
# Parse the first form.
try:
model, item = parse_one_thing(item)
except (ValueError, LexException) as e:
raise self._syntax_error(string, "f-string: " + str_type(e))
# Look for a conversion character.
item = item.lstrip()
conversion = None
if item.startswith('!'):
conversion = item[1]
item = item[2:].lstrip()
# Look for a format specifier.
format_spec = asty.Str(string, s = "")
if item.startswith(':'):
if allow_recursion:
ret += self._format_string(string,
item[1:],
allow_recursion=False)
format_spec = ret.force_expr
else:
format_spec = asty.Str(string, s=item[1:])
elif item:
raise self._syntax_error(string,
"f-string: trailing junk in field")
# Now, having finished compiling any recursively included
# forms, we can compile the first form that we parsed.
ret += self.compile(model)
if PY36:
values.append(asty.FormattedValue(
string,
conversion = -1 if conversion is None else ord(conversion),
format_spec = format_spec,
value = ret.force_expr))
else:
# Make an expression like:
# "{!r:{}}".format(value, format_spec)
values.append(asty.Call(string,
func = asty.Attribute(
string,
value = asty.Str(string, s =
'{' +
('!' + conversion if conversion else '') +
':{}}'),
attr = 'format', ctx = ast.Load()),
args = [ret.force_expr, format_spec],
keywords = [], starargs = None, kwargs = None))
return ret + (
asty.JoinedStr(string, values = values)
if PY36
else reduce(
lambda x, y:
asty.BinOp(string, left = x, op = ast.Add(), right = y),
values))
@builds_model(HyList, HySet)
def compile_list(self, expression):
elts, ret, _ = self._compile_collect(expression)

View File

@ -38,7 +38,7 @@ lg.add('HASHOTHER', r'#%s' % identifier)
# A regexp which matches incomplete strings, used to support
# multi-line strings in the interpreter
partial_string = r'''(?x)
(?:u|r|ur|ru|b|br|rb)? # prefix
(?:u|r|ur|ru|b|br|rb|f|fr|rf)? # prefix
" # start string
(?:
| [^"\\] # non-quote or backslash

View File

@ -200,14 +200,22 @@ def t_empty_list(state, p):
@pg.production("string : STRING")
@set_boundaries
def t_string(state, p):
s = p[0].value
# Detect and remove any "f" prefix.
is_format = False
if s.startswith('f') or s.startswith('rf'):
is_format = True
s = s.replace('f', '', 1)
# Replace the single double quotes with triple double quotes to allow
# embedded newlines.
try:
s = eval(p[0].value.replace('"', '"""', 1)[:-1] + '"""')
s = eval(s.replace('"', '"""', 1)[:-1] + '"""')
except SyntaxError:
raise LexException.from_lexer("Can't convert {} to a HyString".format(p[0].value),
state, p[0])
return (HyString if isinstance(s, str_type) else HyBytes)(s)
return (HyString(s, is_format = is_format)
if isinstance(s, str_type)
else HyBytes(s))
@pg.production("string : PARTIAL_STRING")
@ -222,7 +230,10 @@ bracket_string_re = next(r.re for r in lexer.rules if r.name == 'BRACKETSTRING')
def t_bracket_string(state, p):
m = bracket_string_re.match(p[0].value)
delim, content = m.groups()
return HyString(content, brackets=delim)
return HyString(
content,
is_format = delim == 'f' or delim.startswith('f-'),
brackets = delim)
@pg.production("identifier : IDENTIFIER")

View File

@ -94,8 +94,9 @@ class HyString(HyObject, str_type):
scripts. It's either a ``str`` or a ``unicode``, depending on the
Python version.
"""
def __new__(cls, s=None, brackets=None):
def __new__(cls, s=None, is_format=False, brackets=None):
value = super(HyString, cls).__new__(cls, s)
value.is_format = bool(is_format)
value.brackets = brackets
return value

View File

@ -10,7 +10,7 @@ from hy.compiler import hy_compile, hy_eval
from hy.errors import HyCompileError, HyLanguageError, HyError
from hy.lex import hy_parse
from hy.lex.exceptions import LexException, PrematureEndOfInput
from hy._compat import PY3
from hy._compat import PY3, PY36
import ast
import pytest
@ -511,6 +511,18 @@ def test_ast_unicode_vs_bytes():
assert s('b"\\xa0"') == (bytes([160]) if PY3 else chr(160))
@pytest.mark.skipif(not PY36, reason='f-strings require Python 3.6+')
def test_format_string():
assert can_compile('f"hello world"')
assert can_compile('f"hello {(+ 1 1)} world"')
assert can_compile('f"hello world {(+ 1 1)}"')
assert cant_compile('f"hello {(+ 1 1) world"')
assert cant_compile('f"hello (+ 1 1)} world"')
assert cant_compile('f"hello {(+ 1 1} world"')
assert can_compile(r'f"hello {\"n\"} world"')
assert can_compile(r'f"hello {\"\\n\"} world"')
def test_ast_bracket_string():
assert s(r'#[[empty delims]]') == 'empty delims'
assert s(r'#[my delim[fizzle]my delim]') == 'fizzle'

View File

@ -1217,6 +1217,72 @@
(assert (none? (. '"squid" brackets))))
(defn test-format-strings []
(assert (= f"hello world" "hello world"))
(assert (= f"hello {(+ 1 1)} world" "hello 2 world"))
(assert (= f"a{ (.upper (+ \"g\" \"k\")) }z" "aGKz"))
; Referring to a variable
(setv p "xyzzy")
(assert (= f"h{p}j" "hxyzzyj"))
; Including a statement and setting a variable
(assert (= f"a{(do (setv floop 4) (* floop 2))}z" "a8z"))
(assert (= floop 4))
; Comments
(assert (= f"a{(+ 1
2 ; This is a comment.
3)}z" "a6z"))
; Newlines in replacement fields
(assert (= f"ey {\"bee
cee\"} dee" "ey bee\ncee dee"))
; Conversion characters and format specifiers
(setv p:9 "other")
(setv !r "bar")
(defn u [s]
; Add a "u" prefix for Python 2.
(if PY3
s
(.replace (.replace s "'" "u'" 1) " " " " 1)))
(assert (= f"a{p !r}" (u "a'xyzzy'")))
(assert (= f"a{p :9}" "axyzzy "))
(assert (= f"a{p:9}" "aother"))
(assert (= f"a{p !r :9}" (u "a'xyzzy' ")))
(assert (= f"a{p !r:9}" (u "a'xyzzy' ")))
(assert (= f"a{p:9 :9}" "aother "))
(assert (= f"a{!r}" "abar"))
(assert (= f"a{!r !r}" (u "a'bar'")))
; Fun with `r`
(assert (= f"hello {r\"\\n\"}" r"hello \n"))
(assert (= f"hello {r\"\n\"}" "hello \n"))
; The `r` applies too late to avoid interpreting a backslash.
; Braces escaped via doubling
(assert (= f"ab{{cde" "ab{cde"))
(assert (= f"ab{{cde}}}}fg{{{{{{" "ab{cde}}fg{{{"))
(assert (= f"ab{{{(+ 1 1)}}}" "ab{2}"))
; Nested replacement fields
(assert (= f"{2 :{(+ 2 2)}}" " 2"))
(setv value 12.34 width 10 precision 4)
(assert (= f"result: {value :{width}.{precision}}" "result: 12.34"))
; Nested replacement fields with ! and :
(defclass C [object]
(defn __format__ [self format-spec]
(+ "C[" format-spec "]")))
(assert (= f"{(C) : {(str (+ 1 1)) !r :x<5}}" "C[ '2'xx]"))
; Format bracket strings
(assert (= #[f[a{p !r :9}]f] (u "a'xyzzy' ")))
(assert (= #[f-string[result: {value :{width}.{precision}}]f-string]
"result: 12.34")))
(defn test-import-syntax []
"NATIVE: test the import syntax."