From 5bfc140b4d52969b60b4847159417c8c279a23cf Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Tue, 26 Feb 2019 14:04:24 -0500 Subject: [PATCH] Implement format strings --- NEWS.rst | 8 +++ hy/compiler.py | 115 +++++++++++++++++++++++++++++++-- hy/lex/lexer.py | 2 +- hy/lex/parser.py | 17 ++++- hy/models.py | 3 +- tests/compilers/test_ast.py | 14 +++- tests/native_tests/language.hy | 66 +++++++++++++++++++ 7 files changed, 215 insertions(+), 10 deletions(-) diff --git a/NEWS.rst b/NEWS.rst index 7c66102..cccb8eb 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -1,5 +1,13 @@ .. default-role:: code +Unreleased +============================== + +New Features +------------------------------ +* Format strings with embedded Hy code (e.g., `f"The sum is {(+ x y)}"`) + are now supported, even on Pythons earlier than 3.6. + 0.16.0 ============================== diff --git a/hy/compiler.py b/hy/compiler.py index 08e0c98..8bd2aae 100755 --- a/hy/compiler.py +++ b/hy/compiler.py @@ -12,14 +12,15 @@ from funcparserlib.parser import some, many, oneplus, maybe, NoParseError from hy.errors import (HyCompileError, HyTypeError, HyLanguageError, HySyntaxError, HyEvalError, HyInternalError) -from hy.lex import mangle, unmangle +from hy.lex import mangle, unmangle, hy_parse, parse_one_thing, LexException from hy._compat import (string_types, str_type, bytes_type, long_type, PY3, - PY35, reraise) + PY35, PY36, reraise) from hy.macros import require, load_macros, macroexpand, tag_macroexpand import hy.core +import re import pkgutil import traceback import importlib @@ -31,6 +32,7 @@ import copy import __future__ from collections import defaultdict +from functools import reduce if PY3: import builtins @@ -629,8 +631,11 @@ class HyASTCompiler(object): elif isinstance(form, HyKeyword): body = [HyString(form.name)] - elif isinstance(form, HyString) and form.brackets is not None: - body.extend([HyKeyword("brackets"), form.brackets]) + elif isinstance(form, HyString): + if form.is_format: + body.extend([HyKeyword("is_format"), form.is_format]) + if form.brackets is not None: + body.extend([HyKeyword("brackets"), form.brackets]) ret = HyExpression([HySymbol(name)] + body).replace(form) return imports, ret, False @@ -1798,10 +1803,112 @@ class HyASTCompiler(object): @builds_model(HyString, HyBytes) def compile_string(self, string): + if type(string) is HyString and string.is_format: + # This is a format string (a.k.a. an f-string). + return self._format_string(string, str_type(string)) node = asty.Bytes if PY3 and type(string) is HyBytes else asty.Str f = bytes_type if type(string) is HyBytes else str_type return node(string, s=f(string)) + def _format_string(self, string, rest, allow_recursion=True): + values = [] + ret = Result() + + while True: + # Look for the next replacement field, and get the + # plain text before it. + match = re.search(r'\{\{?|\}\}?', rest) + if match: + literal_chars = rest[: match.start()] + if match.group() == '}': + raise self._syntax_error(string, + "f-string: single '}' is not allowed") + if match.group() in ('{{', '}}'): + # Doubled braces just add a single brace to the text. + literal_chars += match.group()[0] + rest = rest[match.end() :] + else: + literal_chars = rest + rest = "" + if literal_chars: + values.append(asty.Str(string, s = literal_chars)) + if not rest: + break + if match.group() != '{': + continue + + # Look for the end of the replacement field, allowing + # one more level of matched braces, but no deeper, and only + # if we can recurse. + match = re.match( + r'(?: \{ [^{}]* \} | [^{}]+ )* \}' + if allow_recursion + else r'[^{}]* \}', + rest, re.VERBOSE) + if not match: + raise self._syntax_error(string, 'f-string: mismatched braces') + item = rest[: match.end() - 1] + rest = rest[match.end() :] + + # Parse the first form. + try: + model, item = parse_one_thing(item) + except (ValueError, LexException) as e: + raise self._syntax_error(string, "f-string: " + str_type(e)) + + # Look for a conversion character. + item = item.lstrip() + conversion = None + if item.startswith('!'): + conversion = item[1] + item = item[2:].lstrip() + + # Look for a format specifier. + format_spec = asty.Str(string, s = "") + if item.startswith(':'): + if allow_recursion: + ret += self._format_string(string, + item[1:], + allow_recursion=False) + format_spec = ret.force_expr + else: + format_spec = asty.Str(string, s=item[1:]) + elif item: + raise self._syntax_error(string, + "f-string: trailing junk in field") + + # Now, having finished compiling any recursively included + # forms, we can compile the first form that we parsed. + ret += self.compile(model) + + if PY36: + values.append(asty.FormattedValue( + string, + conversion = -1 if conversion is None else ord(conversion), + format_spec = format_spec, + value = ret.force_expr)) + else: + # Make an expression like: + # "{!r:{}}".format(value, format_spec) + values.append(asty.Call(string, + func = asty.Attribute( + string, + value = asty.Str(string, s = + '{' + + ('!' + conversion if conversion else '') + + ':{}}'), + attr = 'format', ctx = ast.Load()), + args = [ret.force_expr, format_spec], + keywords = [], starargs = None, kwargs = None)) + + return ret + ( + asty.JoinedStr(string, values = values) + if PY36 + else reduce( + lambda x, y: + asty.BinOp(string, left = x, op = ast.Add(), right = y), + values)) + @builds_model(HyList, HySet) def compile_list(self, expression): elts, ret, _ = self._compile_collect(expression) diff --git a/hy/lex/lexer.py b/hy/lex/lexer.py index 14b7c88..f202d94 100755 --- a/hy/lex/lexer.py +++ b/hy/lex/lexer.py @@ -38,7 +38,7 @@ lg.add('HASHOTHER', r'#%s' % identifier) # A regexp which matches incomplete strings, used to support # multi-line strings in the interpreter partial_string = r'''(?x) - (?:u|r|ur|ru|b|br|rb)? # prefix + (?:u|r|ur|ru|b|br|rb|f|fr|rf)? # prefix " # start string (?: | [^"\\] # non-quote or backslash diff --git a/hy/lex/parser.py b/hy/lex/parser.py index 27180c5..6a1acfb 100755 --- a/hy/lex/parser.py +++ b/hy/lex/parser.py @@ -200,14 +200,22 @@ def t_empty_list(state, p): @pg.production("string : STRING") @set_boundaries def t_string(state, p): + s = p[0].value + # Detect and remove any "f" prefix. + is_format = False + if s.startswith('f') or s.startswith('rf'): + is_format = True + s = s.replace('f', '', 1) # Replace the single double quotes with triple double quotes to allow # embedded newlines. try: - s = eval(p[0].value.replace('"', '"""', 1)[:-1] + '"""') + s = eval(s.replace('"', '"""', 1)[:-1] + '"""') except SyntaxError: raise LexException.from_lexer("Can't convert {} to a HyString".format(p[0].value), state, p[0]) - return (HyString if isinstance(s, str_type) else HyBytes)(s) + return (HyString(s, is_format = is_format) + if isinstance(s, str_type) + else HyBytes(s)) @pg.production("string : PARTIAL_STRING") @@ -222,7 +230,10 @@ bracket_string_re = next(r.re for r in lexer.rules if r.name == 'BRACKETSTRING') def t_bracket_string(state, p): m = bracket_string_re.match(p[0].value) delim, content = m.groups() - return HyString(content, brackets=delim) + return HyString( + content, + is_format = delim == 'f' or delim.startswith('f-'), + brackets = delim) @pg.production("identifier : IDENTIFIER") diff --git a/hy/models.py b/hy/models.py index ef51a26..458d615 100644 --- a/hy/models.py +++ b/hy/models.py @@ -94,8 +94,9 @@ class HyString(HyObject, str_type): scripts. It's either a ``str`` or a ``unicode``, depending on the Python version. """ - def __new__(cls, s=None, brackets=None): + def __new__(cls, s=None, is_format=False, brackets=None): value = super(HyString, cls).__new__(cls, s) + value.is_format = bool(is_format) value.brackets = brackets return value diff --git a/tests/compilers/test_ast.py b/tests/compilers/test_ast.py index 9311eef..9d004da 100644 --- a/tests/compilers/test_ast.py +++ b/tests/compilers/test_ast.py @@ -10,7 +10,7 @@ from hy.compiler import hy_compile, hy_eval from hy.errors import HyCompileError, HyLanguageError, HyError from hy.lex import hy_parse from hy.lex.exceptions import LexException, PrematureEndOfInput -from hy._compat import PY3 +from hy._compat import PY3, PY36 import ast import pytest @@ -511,6 +511,18 @@ def test_ast_unicode_vs_bytes(): assert s('b"\\xa0"') == (bytes([160]) if PY3 else chr(160)) +@pytest.mark.skipif(not PY36, reason='f-strings require Python 3.6+') +def test_format_string(): + assert can_compile('f"hello world"') + assert can_compile('f"hello {(+ 1 1)} world"') + assert can_compile('f"hello world {(+ 1 1)}"') + assert cant_compile('f"hello {(+ 1 1) world"') + assert cant_compile('f"hello (+ 1 1)} world"') + assert cant_compile('f"hello {(+ 1 1} world"') + assert can_compile(r'f"hello {\"n\"} world"') + assert can_compile(r'f"hello {\"\\n\"} world"') + + def test_ast_bracket_string(): assert s(r'#[[empty delims]]') == 'empty delims' assert s(r'#[my delim[fizzle]my delim]') == 'fizzle' diff --git a/tests/native_tests/language.hy b/tests/native_tests/language.hy index 65c629a..04936dd 100644 --- a/tests/native_tests/language.hy +++ b/tests/native_tests/language.hy @@ -1217,6 +1217,72 @@ (assert (none? (. '"squid" brackets)))) +(defn test-format-strings [] + (assert (= f"hello world" "hello world")) + (assert (= f"hello {(+ 1 1)} world" "hello 2 world")) + (assert (= f"a{ (.upper (+ \"g\" \"k\")) }z" "aGKz")) + + ; Referring to a variable + (setv p "xyzzy") + (assert (= f"h{p}j" "hxyzzyj")) + + ; Including a statement and setting a variable + (assert (= f"a{(do (setv floop 4) (* floop 2))}z" "a8z")) + (assert (= floop 4)) + + ; Comments + (assert (= f"a{(+ 1 + 2 ; This is a comment. + 3)}z" "a6z")) + + ; Newlines in replacement fields + (assert (= f"ey {\"bee +cee\"} dee" "ey bee\ncee dee")) + + ; Conversion characters and format specifiers + (setv p:9 "other") + (setv !r "bar") + (defn u [s] + ; Add a "u" prefix for Python 2. + (if PY3 + s + (.replace (.replace s "'" "u'" 1) " " " " 1))) + (assert (= f"a{p !r}" (u "a'xyzzy'"))) + (assert (= f"a{p :9}" "axyzzy ")) + (assert (= f"a{p:9}" "aother")) + (assert (= f"a{p !r :9}" (u "a'xyzzy' "))) + (assert (= f"a{p !r:9}" (u "a'xyzzy' "))) + (assert (= f"a{p:9 :9}" "aother ")) + (assert (= f"a{!r}" "abar")) + (assert (= f"a{!r !r}" (u "a'bar'"))) + + ; Fun with `r` + (assert (= f"hello {r\"\\n\"}" r"hello \n")) + (assert (= f"hello {r\"\n\"}" "hello \n")) + ; The `r` applies too late to avoid interpreting a backslash. + + ; Braces escaped via doubling + (assert (= f"ab{{cde" "ab{cde")) + (assert (= f"ab{{cde}}}}fg{{{{{{" "ab{cde}}fg{{{")) + (assert (= f"ab{{{(+ 1 1)}}}" "ab{2}")) + + ; Nested replacement fields + (assert (= f"{2 :{(+ 2 2)}}" " 2")) + (setv value 12.34 width 10 precision 4) + (assert (= f"result: {value :{width}.{precision}}" "result: 12.34")) + + ; Nested replacement fields with ! and : + (defclass C [object] + (defn __format__ [self format-spec] + (+ "C[" format-spec "]"))) + (assert (= f"{(C) : {(str (+ 1 1)) !r :x<5}}" "C[ '2'xx]")) + + ; Format bracket strings + (assert (= #[f[a{p !r :9}]f] (u "a'xyzzy' "))) + (assert (= #[f-string[result: {value :{width}.{precision}}]f-string] + "result: 12.34"))) + + (defn test-import-syntax [] "NATIVE: test the import syntax."