diff --git a/NEWS b/NEWS index 66bb80d..a816f24 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,8 @@ Changes from 0.13.0 longer names * Periods are no longer allowed in keywords * `eval` is now a function instead of a special form + * Added a form of string literal called "bracket strings" delimited by + `#[FOO[` and `]FOO]`, where `FOO` is customizable * The compiler now automatically promotes values to Hy model objects as necessary, so you can write ``(eval `(+ 1 ~n))`` instead of ``(eval `(+ 1 ~(HyInteger n)))`` diff --git a/docs/language/api.rst b/docs/language/api.rst index b1a3e1d..ed789eb 100644 --- a/docs/language/api.rst +++ b/docs/language/api.rst @@ -52,26 +52,47 @@ digits. (print 10,000,000,000 10_000_000_000) -Unlike Python, Hy provides literal forms for NaN and infinity: `NaN`, `Inf`, -and `-Inf`. +Unlike Python, Hy provides literal forms for NaN and infinity: ``NaN``, +``Inf``, and ``-Inf``. string literals --------------- -Unlike Python, Hy allows only double-quoted strings (e.g., ``"hello"``). The -single-quote character ``'`` is reserved for preventing the evaluation of a -form (e.g., ``'(+ 1 1)``), as in most Lisps. +Hy allows double-quoted strings (e.g., ``"hello"``), but not single-quoted +strings like Python. The single-quote character ``'`` is reserved for +preventing the evaluation of a form (e.g., ``'(+ 1 1)``), as in most Lisps. Python's so-called triple-quoted strings (e.g., ``'''hello'''`` and ``"""hello"""``) aren't supported. However, in Hy, unlike Python, any string -literal can contain newlines. +literal can contain newlines. Furthermore, Hy supports an alternative form of +string literal called a "bracket string" similar to Lua's long brackets. +Bracket strings have customizable delimiters, like the here-documents of other +languages. A bracket string begins with ``#[FOO[`` and ends with ``]FOO]``, +where ``FOO`` is any string not containing ``[`` or ``]``, including the empty +string. For example:: -Whether running under Python 2 or Python 3, Hy treats string literals as -sequences of Unicode characters by default, and allows you to prefix a literal -with ``b`` to treat it as a sequence of bytes. So when running under Python 3, -Hy translates ``"foo"`` and ``b"foo"`` to the identical Python code, but when -running under Python 2, ``"foo"`` is translated to ``u"foo"`` and ``b"foo"`` is -translated to ``"foo"``. + => (print #[["That's very kind of yuo [sic]" Tom wrote back.]]) + "That's very kind of yuo [sic]" Tom wrote back. + => (print #[==[1 + 1 = 2]==]) + 1 + 1 = 2 + +A bracket string can contain newlines, but if it begins with one, the newline +is removed, so you can begin the content of a bracket string on the line +following the opening delimiter with no effect on the content. Any leading +newlines past the first are preserved. + +Plain string literals support :ref:`a variety of backslash escapes +`. To create a "raw string" that interprets all backslashes +literally, prefix the string with ``r``, as in ``r"slash\not"``. Bracket +strings are always raw strings and don't allow the ``r`` prefix. + +Whether running under Python 2 or Python 3, Hy treats all string literals as +sequences of Unicode characters by default, and allows you to prefix a plain +string literal (but not a bracket string) with ``b`` to treat it as a sequence +of bytes. So when running under Python 3, Hy translates ``"foo"`` and +``b"foo"`` to the identical Python code, but when running under Python 2, +``"foo"`` is translated to ``u"foo"`` and ``b"foo"`` is translated to +``"foo"``. .. _syntax-keywords: diff --git a/docs/language/internals.rst b/docs/language/internals.rst index bf2f4f0..ffd3844 100644 --- a/docs/language/internals.rst +++ b/docs/language/internals.rst @@ -102,7 +102,7 @@ HyString ~~~~~~~~ ``hy.models.HyString`` is the base class of string-equivalent Hy -models. It also represents double-quoted string literals, ``""``, which +models. It also represents string literals (including bracket strings), which compile down to unicode string literals in Python. ``HyStrings`` inherit unicode objects in Python 2, and string objects in Python 3 (and are therefore not encoding-dependent). @@ -113,6 +113,12 @@ Hy literal strings can span multiple lines, and are considered by the parser as a single unit, respecting the Python escapes for unicode strings. +``HyString``\s have an attribute ``brackets`` that stores the custom +delimiter used for a bracket string (e.g., ``"=="`` for ``#[==[hello +world]==]`` and the empty string for ``#[[hello world]]``). +``HyString``\s that are not produced by bracket strings have their +``brackets`` set to ``None``. + HyBytes ~~~~~~~ diff --git a/hy/compiler.py b/hy/compiler.py index 34dc285..8ddd993 100755 --- a/hy/compiler.py +++ b/hy/compiler.py @@ -745,6 +745,12 @@ class HyASTCompiler(object): return imports, HyExpression([HySymbol(name), HyString(form)]).replace(form), False + elif isinstance(form, HyString): + x = [HySymbol(name), form] + if form.brackets is not None: + x.extend([HyKeyword(":brackets"), form.brackets]) + return imports, HyExpression(x).replace(form), False + return imports, HyExpression([HySymbol(name), form]).replace(form), False diff --git a/hy/lex/lexer.py b/hy/lex/lexer.py index cb021a0..b3d1f93 100755 --- a/hy/lex/lexer.py +++ b/hy/lex/lexer.py @@ -27,6 +27,12 @@ lg.add('UNQUOTESPLICE', r'~@%s' % end_quote) lg.add('UNQUOTE', r'~%s' % end_quote) lg.add('DISCARD', r'#_') lg.add('HASHSTARS', r'#\*+') +lg.add('BRACKETSTRING', r'''(?x) + \# \[ ( [^\[\]]* ) \[ # Opening delimiter + \n? # A single leading newline will be ignored + ((?:\n|.)*?) # Content of the string + \] \1 \] # Closing delimiter + ''') lg.add('HASHOTHER', r'#%s' % identifier) # A regexp which matches incomplete strings, used to support diff --git a/hy/lex/parser.py b/hy/lex/parser.py index 4933a92..05fda85 100755 --- a/hy/lex/parser.py +++ b/hy/lex/parser.py @@ -281,6 +281,15 @@ def t_partial_string(p): raise PrematureEndOfInput("Premature end of input") +bracket_string_re = next(r.re for r in lexer.rules if r.name == 'BRACKETSTRING') +@pg.production("string : BRACKETSTRING") +@set_boundaries +def t_bracket_string(p): + m = bracket_string_re.match(p[0].value) + delim, content = m.groups() + return HyString(content, brackets=delim) + + @pg.production("identifier : IDENTIFIER") @set_boundaries def t_identifier(p): diff --git a/hy/models.py b/hy/models.py index 574db49..0d6df4c 100644 --- a/hy/models.py +++ b/hy/models.py @@ -65,7 +65,10 @@ class HyString(HyObject, str_type): scripts. It's either a ``str`` or a ``unicode``, depending on the Python version. """ - pass + def __new__(cls, s=None, brackets=None): + value = super(HyString, cls).__new__(cls, s) + value.brackets = brackets + return value _wrappers[str_type] = HyString diff --git a/tests/compilers/test_ast.py b/tests/compilers/test_ast.py index 9dccfd9..0d62e16 100644 --- a/tests/compilers/test_ast.py +++ b/tests/compilers/test_ast.py @@ -1,3 +1,4 @@ +# -*- encoding: utf-8 -*- # Copyright 2017 the authors. # This file is part of Hy, which is free software licensed under the Expat # license. See the LICENSE. @@ -46,6 +47,10 @@ def cant_compile(expr): return e +def s(x): + return can_compile(x).body[0].value.s + + def test_ast_bad_type(): "Make sure AST breakage can happen" class C: @@ -481,12 +486,31 @@ def test_ast_unicode_strings(): def test_ast_unicode_vs_bytes(): - def f(x): return can_compile(x).body[0].value.s - assert f('"hello"') == u"hello" - assert type(f('"hello"')) is (str if PY3 else unicode) # noqa - assert f('b"hello"') == (eval('b"hello"') if PY3 else "hello") - assert type(f('b"hello"')) == (bytes if PY3 else str) - assert f('b"\\xa0"') == (bytes([160]) if PY3 else chr(160)) + assert s('"hello"') == u"hello" + assert type(s('"hello"')) is (str if PY3 else unicode) # noqa + assert s('b"hello"') == (eval('b"hello"') if PY3 else "hello") + assert type(s('b"hello"')) is (bytes if PY3 else str) + assert s('b"\\xa0"') == (bytes([160]) if PY3 else chr(160)) + + +def test_ast_bracket_string(): + assert s(r'#[[empty delims]]') == 'empty delims' + assert s(r'#[my delim[fizzle]my delim]') == 'fizzle' + assert s(r'#[[]]') == '' + assert s(r'#[my delim[]my delim]') == '' + assert type(s('#[X[hello]X]')) is (str if PY3 else unicode) # noqa + assert s(r'#[X[raw\nstring]X]') == 'raw\\nstring' + assert s(r'#[foozle[aa foozli bb ]foozle]') == 'aa foozli bb ' + assert s(r'#[([unbalanced](]') == 'unbalanced' + assert s(r'#[(1💯@)} {a![hello world](1💯@)} {a!]') == 'hello world' + assert (s(r'''#[X[ +Remove the leading newline, please. +]X]''') == 'Remove the leading newline, please.\n') + assert (s(r'''#[X[ + + +Only one leading newline should be removed. +]X]''') == '\n\nOnly one leading newline should be removed.\n') def test_compile_error(): diff --git a/tests/native_tests/language.hy b/tests/native_tests/language.hy index c221d03..1a2ab0f 100644 --- a/tests/native_tests/language.hy +++ b/tests/native_tests/language.hy @@ -1230,6 +1230,12 @@ (assert (= (eval `(get ~d ~k)) 2))) +(defn test-quote-bracket-string-delim [] + (assert (= (. '#[my delim[hello world]my delim] brackets) "my delim")) + (assert (= (. '#[[squid]] brackets) "")) + (assert (none? (. '"squid" brackets)))) + + (defn test-import-syntax [] "NATIVE: test the import syntax." diff --git a/tests/test_lex.py b/tests/test_lex.py index 247ae4d..5a21e7f 100644 --- a/tests/test_lex.py +++ b/tests/test_lex.py @@ -69,6 +69,17 @@ bc" assert objs == [HyString("abc")] +def test_lex_bracket_strings(): + + objs = tokenize("#[my delim[hello world]my delim]") + assert objs == [HyString("hello world")] + assert objs[0].brackets == "my delim" + + objs = tokenize("#[[squid]]") + assert objs == [HyString("squid")] + assert objs[0].brackets == "" + + def test_lex_integers(): """ Make sure that integers are valid expressions""" objs = tokenize("42 ")