Implement format strings

This commit is contained in:
Kodi Arfer 2019-02-26 14:04:24 -05:00
parent 56f51a9a20
commit 5bfc140b4d
7 changed files with 215 additions and 10 deletions

View File

@ -1,5 +1,13 @@
.. default-role:: code .. default-role:: code
Unreleased
==============================
New Features
------------------------------
* Format strings with embedded Hy code (e.g., `f"The sum is {(+ x y)}"`)
are now supported, even on Pythons earlier than 3.6.
0.16.0 0.16.0
============================== ==============================

View File

@ -12,14 +12,15 @@ from funcparserlib.parser import some, many, oneplus, maybe, NoParseError
from hy.errors import (HyCompileError, HyTypeError, HyLanguageError, from hy.errors import (HyCompileError, HyTypeError, HyLanguageError,
HySyntaxError, HyEvalError, HyInternalError) HySyntaxError, HyEvalError, HyInternalError)
from hy.lex import mangle, unmangle from hy.lex import mangle, unmangle, hy_parse, parse_one_thing, LexException
from hy._compat import (string_types, str_type, bytes_type, long_type, PY3, from hy._compat import (string_types, str_type, bytes_type, long_type, PY3,
PY35, reraise) PY35, PY36, reraise)
from hy.macros import require, load_macros, macroexpand, tag_macroexpand from hy.macros import require, load_macros, macroexpand, tag_macroexpand
import hy.core import hy.core
import re
import pkgutil import pkgutil
import traceback import traceback
import importlib import importlib
@ -31,6 +32,7 @@ import copy
import __future__ import __future__
from collections import defaultdict from collections import defaultdict
from functools import reduce
if PY3: if PY3:
import builtins import builtins
@ -629,8 +631,11 @@ class HyASTCompiler(object):
elif isinstance(form, HyKeyword): elif isinstance(form, HyKeyword):
body = [HyString(form.name)] body = [HyString(form.name)]
elif isinstance(form, HyString) and form.brackets is not None: elif isinstance(form, HyString):
body.extend([HyKeyword("brackets"), form.brackets]) if form.is_format:
body.extend([HyKeyword("is_format"), form.is_format])
if form.brackets is not None:
body.extend([HyKeyword("brackets"), form.brackets])
ret = HyExpression([HySymbol(name)] + body).replace(form) ret = HyExpression([HySymbol(name)] + body).replace(form)
return imports, ret, False return imports, ret, False
@ -1798,10 +1803,112 @@ class HyASTCompiler(object):
@builds_model(HyString, HyBytes) @builds_model(HyString, HyBytes)
def compile_string(self, string): def compile_string(self, string):
if type(string) is HyString and string.is_format:
# This is a format string (a.k.a. an f-string).
return self._format_string(string, str_type(string))
node = asty.Bytes if PY3 and type(string) is HyBytes else asty.Str node = asty.Bytes if PY3 and type(string) is HyBytes else asty.Str
f = bytes_type if type(string) is HyBytes else str_type f = bytes_type if type(string) is HyBytes else str_type
return node(string, s=f(string)) return node(string, s=f(string))
def _format_string(self, string, rest, allow_recursion=True):
values = []
ret = Result()
while True:
# Look for the next replacement field, and get the
# plain text before it.
match = re.search(r'\{\{?|\}\}?', rest)
if match:
literal_chars = rest[: match.start()]
if match.group() == '}':
raise self._syntax_error(string,
"f-string: single '}' is not allowed")
if match.group() in ('{{', '}}'):
# Doubled braces just add a single brace to the text.
literal_chars += match.group()[0]
rest = rest[match.end() :]
else:
literal_chars = rest
rest = ""
if literal_chars:
values.append(asty.Str(string, s = literal_chars))
if not rest:
break
if match.group() != '{':
continue
# Look for the end of the replacement field, allowing
# one more level of matched braces, but no deeper, and only
# if we can recurse.
match = re.match(
r'(?: \{ [^{}]* \} | [^{}]+ )* \}'
if allow_recursion
else r'[^{}]* \}',
rest, re.VERBOSE)
if not match:
raise self._syntax_error(string, 'f-string: mismatched braces')
item = rest[: match.end() - 1]
rest = rest[match.end() :]
# Parse the first form.
try:
model, item = parse_one_thing(item)
except (ValueError, LexException) as e:
raise self._syntax_error(string, "f-string: " + str_type(e))
# Look for a conversion character.
item = item.lstrip()
conversion = None
if item.startswith('!'):
conversion = item[1]
item = item[2:].lstrip()
# Look for a format specifier.
format_spec = asty.Str(string, s = "")
if item.startswith(':'):
if allow_recursion:
ret += self._format_string(string,
item[1:],
allow_recursion=False)
format_spec = ret.force_expr
else:
format_spec = asty.Str(string, s=item[1:])
elif item:
raise self._syntax_error(string,
"f-string: trailing junk in field")
# Now, having finished compiling any recursively included
# forms, we can compile the first form that we parsed.
ret += self.compile(model)
if PY36:
values.append(asty.FormattedValue(
string,
conversion = -1 if conversion is None else ord(conversion),
format_spec = format_spec,
value = ret.force_expr))
else:
# Make an expression like:
# "{!r:{}}".format(value, format_spec)
values.append(asty.Call(string,
func = asty.Attribute(
string,
value = asty.Str(string, s =
'{' +
('!' + conversion if conversion else '') +
':{}}'),
attr = 'format', ctx = ast.Load()),
args = [ret.force_expr, format_spec],
keywords = [], starargs = None, kwargs = None))
return ret + (
asty.JoinedStr(string, values = values)
if PY36
else reduce(
lambda x, y:
asty.BinOp(string, left = x, op = ast.Add(), right = y),
values))
@builds_model(HyList, HySet) @builds_model(HyList, HySet)
def compile_list(self, expression): def compile_list(self, expression):
elts, ret, _ = self._compile_collect(expression) elts, ret, _ = self._compile_collect(expression)

View File

@ -38,7 +38,7 @@ lg.add('HASHOTHER', r'#%s' % identifier)
# A regexp which matches incomplete strings, used to support # A regexp which matches incomplete strings, used to support
# multi-line strings in the interpreter # multi-line strings in the interpreter
partial_string = r'''(?x) partial_string = r'''(?x)
(?:u|r|ur|ru|b|br|rb)? # prefix (?:u|r|ur|ru|b|br|rb|f|fr|rf)? # prefix
" # start string " # start string
(?: (?:
| [^"\\] # non-quote or backslash | [^"\\] # non-quote or backslash

View File

@ -200,14 +200,22 @@ def t_empty_list(state, p):
@pg.production("string : STRING") @pg.production("string : STRING")
@set_boundaries @set_boundaries
def t_string(state, p): def t_string(state, p):
s = p[0].value
# Detect and remove any "f" prefix.
is_format = False
if s.startswith('f') or s.startswith('rf'):
is_format = True
s = s.replace('f', '', 1)
# Replace the single double quotes with triple double quotes to allow # Replace the single double quotes with triple double quotes to allow
# embedded newlines. # embedded newlines.
try: try:
s = eval(p[0].value.replace('"', '"""', 1)[:-1] + '"""') s = eval(s.replace('"', '"""', 1)[:-1] + '"""')
except SyntaxError: except SyntaxError:
raise LexException.from_lexer("Can't convert {} to a HyString".format(p[0].value), raise LexException.from_lexer("Can't convert {} to a HyString".format(p[0].value),
state, p[0]) state, p[0])
return (HyString if isinstance(s, str_type) else HyBytes)(s) return (HyString(s, is_format = is_format)
if isinstance(s, str_type)
else HyBytes(s))
@pg.production("string : PARTIAL_STRING") @pg.production("string : PARTIAL_STRING")
@ -222,7 +230,10 @@ bracket_string_re = next(r.re for r in lexer.rules if r.name == 'BRACKETSTRING')
def t_bracket_string(state, p): def t_bracket_string(state, p):
m = bracket_string_re.match(p[0].value) m = bracket_string_re.match(p[0].value)
delim, content = m.groups() delim, content = m.groups()
return HyString(content, brackets=delim) return HyString(
content,
is_format = delim == 'f' or delim.startswith('f-'),
brackets = delim)
@pg.production("identifier : IDENTIFIER") @pg.production("identifier : IDENTIFIER")

View File

@ -94,8 +94,9 @@ class HyString(HyObject, str_type):
scripts. It's either a ``str`` or a ``unicode``, depending on the scripts. It's either a ``str`` or a ``unicode``, depending on the
Python version. Python version.
""" """
def __new__(cls, s=None, brackets=None): def __new__(cls, s=None, is_format=False, brackets=None):
value = super(HyString, cls).__new__(cls, s) value = super(HyString, cls).__new__(cls, s)
value.is_format = bool(is_format)
value.brackets = brackets value.brackets = brackets
return value return value

View File

@ -10,7 +10,7 @@ from hy.compiler import hy_compile, hy_eval
from hy.errors import HyCompileError, HyLanguageError, HyError from hy.errors import HyCompileError, HyLanguageError, HyError
from hy.lex import hy_parse from hy.lex import hy_parse
from hy.lex.exceptions import LexException, PrematureEndOfInput from hy.lex.exceptions import LexException, PrematureEndOfInput
from hy._compat import PY3 from hy._compat import PY3, PY36
import ast import ast
import pytest import pytest
@ -511,6 +511,18 @@ def test_ast_unicode_vs_bytes():
assert s('b"\\xa0"') == (bytes([160]) if PY3 else chr(160)) assert s('b"\\xa0"') == (bytes([160]) if PY3 else chr(160))
@pytest.mark.skipif(not PY36, reason='f-strings require Python 3.6+')
def test_format_string():
assert can_compile('f"hello world"')
assert can_compile('f"hello {(+ 1 1)} world"')
assert can_compile('f"hello world {(+ 1 1)}"')
assert cant_compile('f"hello {(+ 1 1) world"')
assert cant_compile('f"hello (+ 1 1)} world"')
assert cant_compile('f"hello {(+ 1 1} world"')
assert can_compile(r'f"hello {\"n\"} world"')
assert can_compile(r'f"hello {\"\\n\"} world"')
def test_ast_bracket_string(): def test_ast_bracket_string():
assert s(r'#[[empty delims]]') == 'empty delims' assert s(r'#[[empty delims]]') == 'empty delims'
assert s(r'#[my delim[fizzle]my delim]') == 'fizzle' assert s(r'#[my delim[fizzle]my delim]') == 'fizzle'

View File

@ -1217,6 +1217,72 @@
(assert (none? (. '"squid" brackets)))) (assert (none? (. '"squid" brackets))))
(defn test-format-strings []
(assert (= f"hello world" "hello world"))
(assert (= f"hello {(+ 1 1)} world" "hello 2 world"))
(assert (= f"a{ (.upper (+ \"g\" \"k\")) }z" "aGKz"))
; Referring to a variable
(setv p "xyzzy")
(assert (= f"h{p}j" "hxyzzyj"))
; Including a statement and setting a variable
(assert (= f"a{(do (setv floop 4) (* floop 2))}z" "a8z"))
(assert (= floop 4))
; Comments
(assert (= f"a{(+ 1
2 ; This is a comment.
3)}z" "a6z"))
; Newlines in replacement fields
(assert (= f"ey {\"bee
cee\"} dee" "ey bee\ncee dee"))
; Conversion characters and format specifiers
(setv p:9 "other")
(setv !r "bar")
(defn u [s]
; Add a "u" prefix for Python 2.
(if PY3
s
(.replace (.replace s "'" "u'" 1) " " " " 1)))
(assert (= f"a{p !r}" (u "a'xyzzy'")))
(assert (= f"a{p :9}" "axyzzy "))
(assert (= f"a{p:9}" "aother"))
(assert (= f"a{p !r :9}" (u "a'xyzzy' ")))
(assert (= f"a{p !r:9}" (u "a'xyzzy' ")))
(assert (= f"a{p:9 :9}" "aother "))
(assert (= f"a{!r}" "abar"))
(assert (= f"a{!r !r}" (u "a'bar'")))
; Fun with `r`
(assert (= f"hello {r\"\\n\"}" r"hello \n"))
(assert (= f"hello {r\"\n\"}" "hello \n"))
; The `r` applies too late to avoid interpreting a backslash.
; Braces escaped via doubling
(assert (= f"ab{{cde" "ab{cde"))
(assert (= f"ab{{cde}}}}fg{{{{{{" "ab{cde}}fg{{{"))
(assert (= f"ab{{{(+ 1 1)}}}" "ab{2}"))
; Nested replacement fields
(assert (= f"{2 :{(+ 2 2)}}" " 2"))
(setv value 12.34 width 10 precision 4)
(assert (= f"result: {value :{width}.{precision}}" "result: 12.34"))
; Nested replacement fields with ! and :
(defclass C [object]
(defn __format__ [self format-spec]
(+ "C[" format-spec "]")))
(assert (= f"{(C) : {(str (+ 1 1)) !r :x<5}}" "C[ '2'xx]"))
; Format bracket strings
(assert (= #[f[a{p !r :9}]f] (u "a'xyzzy' ")))
(assert (= #[f-string[result: {value :{width}.{precision}}]f-string]
"result: 12.34")))
(defn test-import-syntax [] (defn test-import-syntax []
"NATIVE: test the import syntax." "NATIVE: test the import syntax."