diff --git a/hy/lex/lexer.py b/hy/lex/lexer.py index 8032fc0..5c1ba3b 100644 --- a/hy/lex/lexer.py +++ b/hy/lex/lexer.py @@ -42,8 +42,9 @@ lg.add('UNQUOTE', r'~%s' % end_quote) lg.add('HASHBANG', r'#!.*[^\r\n]') lg.add('HASHREADER', r'#.') - -lg.add('STRING', r'''(?x) +# A regexp which matches incomplete strings, used to support +# multi-line strings in the interpreter +partial_string = r'''(?x) (?:u|r|ur|ru)? # prefix " # start string (?: @@ -53,9 +54,10 @@ lg.add('STRING', r'''(?x) | \\u[0-9a-fA-F]{4} # or unicode escape | \\U[0-9a-fA-F]{8} # or long unicode escape )* # one or more times - " # end string -''') +''' +lg.add('STRING', r'%s"' % partial_string) +lg.add('PARTIAL_STRING', partial_string) lg.add('IDENTIFIER', r'[^()\[\]{}\'"\s;]+') diff --git a/hy/lex/parser.py b/hy/lex/parser.py index 30793f2..037a673 100644 --- a/hy/lex/parser.py +++ b/hy/lex/parser.py @@ -236,6 +236,12 @@ def t_string(p): return uni_hystring(s) +@pg.production("string : PARTIAL_STRING") +def t_partial_string(p): + # Any unterminated string requires more input + raise PrematureEndOfInput("Premature end of input") + + @pg.production("identifier : IDENTIFIER") @set_boundaries def t_identifier(p): diff --git a/tests/lex/test_lex.py b/tests/lex/test_lex.py index ebf0404..9808d70 100644 --- a/tests/lex/test_lex.py +++ b/tests/lex/test_lex.py @@ -49,6 +49,11 @@ def test_lex_exception(): assert True is False except PrematureEndOfInput: pass + try: + tokenize("(foo \"bar") + assert True is False + except PrematureEndOfInput: + pass def test_unbalanced_exception(): @@ -73,7 +78,7 @@ def test_lex_expression_symbols(): def test_lex_expression_strings(): - """ Test that expressions can produce symbols """ + """ Test that expressions can produce strings """ objs = tokenize("(foo \"bar\")") assert objs == [HyExpression([HySymbol("foo"), HyString("bar")])]