From eb23ddc1e2abfc107bbfb23f3db705a0b47993f3 Mon Sep 17 00:00:00 2001
From: Kodi Arfer <git@arfer.net>
Date: Fri, 8 Sep 2017 11:22:31 -0700
Subject: [PATCH] =?UTF-8?q?Add=20#[DELIM[=20=E2=80=A6=20]DELIM]=20syntax?=
 =?UTF-8?q?=20for=20string=20literals?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 hy/lex/lexer.py             |  6 ++++++
 hy/lex/parser.py            | 11 +++++++++++
 tests/compilers/test_ast.py | 36 ++++++++++++++++++++++++++++++------
 tests/test_lex.py           | 11 +++++++++++
 4 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/hy/lex/lexer.py b/hy/lex/lexer.py
index cb021a0..b3d1f93 100755
--- a/hy/lex/lexer.py
+++ b/hy/lex/lexer.py
@@ -27,6 +27,12 @@ lg.add('UNQUOTESPLICE', r'~@%s' % end_quote)
 lg.add('UNQUOTE', r'~%s' % end_quote)
 lg.add('DISCARD', r'#_')
 lg.add('HASHSTARS', r'#\*+')
+lg.add('BRACKETSTRING', r'''(?x)
+    \# \[ ( [^\[\]]* ) \[    # Opening delimiter
+    \n?                      # A single leading newline will be ignored
+    ((?:\n|.)*?)             # Content of the string
+    \] \1 \]                 # Closing delimiter
+    ''')
 lg.add('HASHOTHER', r'#%s' % identifier)
 
 # A regexp which matches incomplete strings, used to support
diff --git a/hy/lex/parser.py b/hy/lex/parser.py
index 4933a92..a289ad7 100755
--- a/hy/lex/parser.py
+++ b/hy/lex/parser.py
@@ -281,6 +281,17 @@ def t_partial_string(p):
     raise PrematureEndOfInput("Premature end of input")
 
 
+bracket_string_re = next(r.re for r in lexer.rules if r.name == 'BRACKETSTRING')
+@pg.production("string : BRACKETSTRING")
+@set_boundaries
+def t_bracket_string(p):
+    m = bracket_string_re.match(p[0].value)
+    delim, content = m.groups()
+    s = HyString(content)
+    s.brackets = delim
+    return s
+
+
 @pg.production("identifier : IDENTIFIER")
 @set_boundaries
 def t_identifier(p):
diff --git a/tests/compilers/test_ast.py b/tests/compilers/test_ast.py
index 4e0e00b..b6a5a42 100644
--- a/tests/compilers/test_ast.py
+++ b/tests/compilers/test_ast.py
@@ -1,3 +1,4 @@
+# -*- encoding: utf-8 -*-
 # Copyright 2017 the authors.
 # This file is part of Hy, which is free software licensed under the Expat
 # license. See the LICENSE.
@@ -46,6 +47,10 @@ def cant_compile(expr):
         return e
 
 
+def s(x):
+    return can_compile(x).body[0].value.s
+
+
 def test_ast_bad_type():
     "Make sure AST breakage can happen"
     class C:
@@ -480,12 +485,31 @@ def test_ast_unicode_strings():
 
 
 def test_ast_unicode_vs_bytes():
-    def f(x): return can_compile(x).body[0].value.s
-    assert f('"hello"') == u"hello"
-    assert type(f('"hello"')) is (str if PY3 else unicode)  # noqa
-    assert f('b"hello"') == (eval('b"hello"') if PY3 else "hello")
-    assert type(f('b"hello"')) == (bytes if PY3 else str)
-    assert f('b"\\xa0"') == (bytes([160]) if PY3 else chr(160))
+    assert s('"hello"') == u"hello"
+    assert type(s('"hello"')) is (str if PY3 else unicode)  # noqa
+    assert s('b"hello"') == (eval('b"hello"') if PY3 else "hello")
+    assert type(s('b"hello"')) is (bytes if PY3 else str)
+    assert s('b"\\xa0"') == (bytes([160]) if PY3 else chr(160))
+
+
+def test_ast_bracket_string():
+    assert s(r'#[[empty delims]]') == 'empty delims'
+    assert s(r'#[my delim[fizzle]my delim]') == 'fizzle'
+    assert s(r'#[[]]') == ''
+    assert s(r'#[my delim[]my delim]') == ''
+    assert type(s('#[X[hello]X]')) is (str if PY3 else unicode)  # noqa
+    assert s(r'#[X[raw\nstring]X]') == 'raw\\nstring'
+    assert s(r'#[foozle[aa foozli bb ]foozle]') == 'aa foozli bb '
+    assert s(r'#[([unbalanced](]') == 'unbalanced'
+    assert s(r'#[(1💯@)} {a![hello world](1💯@)} {a!]') == 'hello world'
+    assert (s(r'''#[X[
+Remove the leading newline, please.
+]X]''') == 'Remove the leading newline, please.\n')
+    assert (s(r'''#[X[
+
+
+Only one leading newline should be removed.
+]X]''') == '\n\nOnly one leading newline should be removed.\n')
 
 
 def test_compile_error():
diff --git a/tests/test_lex.py b/tests/test_lex.py
index 247ae4d..5a21e7f 100644
--- a/tests/test_lex.py
+++ b/tests/test_lex.py
@@ -69,6 +69,17 @@ bc"
     assert objs == [HyString("abc")]
 
 
+def test_lex_bracket_strings():
+
+    objs = tokenize("#[my delim[hello world]my delim]")
+    assert objs == [HyString("hello world")]
+    assert objs[0].brackets == "my delim"
+
+    objs = tokenize("#[[squid]]")
+    assert objs == [HyString("squid")]
+    assert objs[0].brackets == ""
+
+
 def test_lex_integers():
     """ Make sure that integers are valid expressions"""
     objs = tokenize("42 ")