From eb23ddc1e2abfc107bbfb23f3db705a0b47993f3 Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Fri, 8 Sep 2017 11:22:31 -0700 Subject: [PATCH] =?UTF-8?q?Add=20#[DELIM[=20=E2=80=A6=20]DELIM]=20syntax?= =?UTF-8?q?=20for=20string=20literals?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- hy/lex/lexer.py | 6 ++++++ hy/lex/parser.py | 11 +++++++++++ tests/compilers/test_ast.py | 36 ++++++++++++++++++++++++++++++------ tests/test_lex.py | 11 +++++++++++ 4 files changed, 58 insertions(+), 6 deletions(-) diff --git a/hy/lex/lexer.py b/hy/lex/lexer.py index cb021a0..b3d1f93 100755 --- a/hy/lex/lexer.py +++ b/hy/lex/lexer.py @@ -27,6 +27,12 @@ lg.add('UNQUOTESPLICE', r'~@%s' % end_quote) lg.add('UNQUOTE', r'~%s' % end_quote) lg.add('DISCARD', r'#_') lg.add('HASHSTARS', r'#\*+') +lg.add('BRACKETSTRING', r'''(?x) + \# \[ ( [^\[\]]* ) \[ # Opening delimiter + \n? # A single leading newline will be ignored + ((?:\n|.)*?) # Content of the string + \] \1 \] # Closing delimiter + ''') lg.add('HASHOTHER', r'#%s' % identifier) # A regexp which matches incomplete strings, used to support diff --git a/hy/lex/parser.py b/hy/lex/parser.py index 4933a92..a289ad7 100755 --- a/hy/lex/parser.py +++ b/hy/lex/parser.py @@ -281,6 +281,17 @@ def t_partial_string(p): raise PrematureEndOfInput("Premature end of input") +bracket_string_re = next(r.re for r in lexer.rules if r.name == 'BRACKETSTRING') +@pg.production("string : BRACKETSTRING") +@set_boundaries +def t_bracket_string(p): + m = bracket_string_re.match(p[0].value) + delim, content = m.groups() + s = HyString(content) + s.brackets = delim + return s + + @pg.production("identifier : IDENTIFIER") @set_boundaries def t_identifier(p): diff --git a/tests/compilers/test_ast.py b/tests/compilers/test_ast.py index 4e0e00b..b6a5a42 100644 --- a/tests/compilers/test_ast.py +++ b/tests/compilers/test_ast.py @@ -1,3 +1,4 @@ +# -*- encoding: utf-8 -*- # Copyright 2017 the authors. # This file is part of Hy, which is free software licensed under the Expat # license. See the LICENSE. @@ -46,6 +47,10 @@ def cant_compile(expr): return e +def s(x): + return can_compile(x).body[0].value.s + + def test_ast_bad_type(): "Make sure AST breakage can happen" class C: @@ -480,12 +485,31 @@ def test_ast_unicode_strings(): def test_ast_unicode_vs_bytes(): - def f(x): return can_compile(x).body[0].value.s - assert f('"hello"') == u"hello" - assert type(f('"hello"')) is (str if PY3 else unicode) # noqa - assert f('b"hello"') == (eval('b"hello"') if PY3 else "hello") - assert type(f('b"hello"')) == (bytes if PY3 else str) - assert f('b"\\xa0"') == (bytes([160]) if PY3 else chr(160)) + assert s('"hello"') == u"hello" + assert type(s('"hello"')) is (str if PY3 else unicode) # noqa + assert s('b"hello"') == (eval('b"hello"') if PY3 else "hello") + assert type(s('b"hello"')) is (bytes if PY3 else str) + assert s('b"\\xa0"') == (bytes([160]) if PY3 else chr(160)) + + +def test_ast_bracket_string(): + assert s(r'#[[empty delims]]') == 'empty delims' + assert s(r'#[my delim[fizzle]my delim]') == 'fizzle' + assert s(r'#[[]]') == '' + assert s(r'#[my delim[]my delim]') == '' + assert type(s('#[X[hello]X]')) is (str if PY3 else unicode) # noqa + assert s(r'#[X[raw\nstring]X]') == 'raw\\nstring' + assert s(r'#[foozle[aa foozli bb ]foozle]') == 'aa foozli bb ' + assert s(r'#[([unbalanced](]') == 'unbalanced' + assert s(r'#[(1💯@)} {a![hello world](1💯@)} {a!]') == 'hello world' + assert (s(r'''#[X[ +Remove the leading newline, please. +]X]''') == 'Remove the leading newline, please.\n') + assert (s(r'''#[X[ + + +Only one leading newline should be removed. +]X]''') == '\n\nOnly one leading newline should be removed.\n') def test_compile_error(): diff --git a/tests/test_lex.py b/tests/test_lex.py index 247ae4d..5a21e7f 100644 --- a/tests/test_lex.py +++ b/tests/test_lex.py @@ -69,6 +69,17 @@ bc" assert objs == [HyString("abc")] +def test_lex_bracket_strings(): + + objs = tokenize("#[my delim[hello world]my delim]") + assert objs == [HyString("hello world")] + assert objs[0].brackets == "my delim" + + objs = tokenize("#[[squid]]") + assert objs == [HyString("squid")] + assert objs[0].brackets == "" + + def test_lex_integers(): """ Make sure that integers are valid expressions""" objs = tokenize("42 ")