508 lines
16 KiB
Python
508 lines
16 KiB
Python
# Copyright 2020 the authors.
|
|
# This file is part of Hy, which is free software licensed under the Expat
|
|
# license. See the LICENSE.
|
|
import sys
|
|
import traceback
|
|
|
|
import pytest
|
|
|
|
from math import isnan
|
|
from hy.models import (HyExpression, HyInteger, HyFloat, HyComplex, HySymbol,
|
|
HyString, HyDict, HyList, HySet, HyKeyword)
|
|
from hy.lex import tokenize
|
|
from hy.lex.exceptions import LexException, PrematureEndOfInput
|
|
from hy.errors import hy_exc_handler
|
|
|
|
def peoi(): return pytest.raises(PrematureEndOfInput)
|
|
def lexe(): return pytest.raises(LexException)
|
|
|
|
|
|
def check_ex(execinfo, expected):
|
|
output = traceback.format_exception_only(execinfo.type, execinfo.value)
|
|
assert output[:-1] == expected[:-1]
|
|
# Python 2.7 doesn't give the full exception name, so we compensate.
|
|
assert output[-1].endswith(expected[-1])
|
|
|
|
|
|
def check_trace_output(capsys, execinfo, expected):
|
|
sys.__excepthook__(execinfo.type, execinfo.value, execinfo.tb)
|
|
captured_wo_filtering = capsys.readouterr()[-1].strip('\n')
|
|
|
|
hy_exc_handler(execinfo.type, execinfo.value, execinfo.tb)
|
|
captured_w_filtering = capsys.readouterr()[-1].strip('\n')
|
|
|
|
output = captured_w_filtering.split('\n')
|
|
|
|
# Make sure the filtered frames aren't the same as the unfiltered ones.
|
|
assert output[:-1] != captured_wo_filtering.split('\n')[:-1]
|
|
# Remove the origin frame lines.
|
|
assert output[3:-1] == expected[:-1]
|
|
# Python 2.7 doesn't give the full exception name, so we compensate.
|
|
assert output[-1].endswith(expected[-1])
|
|
|
|
|
|
def test_lex_exception():
|
|
""" Ensure tokenize throws a fit on a partial input """
|
|
with peoi(): tokenize("(foo")
|
|
with peoi(): tokenize("{foo bar")
|
|
with peoi(): tokenize("(defn foo [bar]")
|
|
with peoi(): tokenize("(foo \"bar")
|
|
|
|
|
|
def test_unbalanced_exception():
|
|
"""Ensure the tokenization fails on unbalanced expressions"""
|
|
with lexe(): tokenize("(bar))")
|
|
with lexe(): tokenize("(baz [quux]])")
|
|
|
|
|
|
def test_lex_single_quote_err():
|
|
"Ensure tokenizing \"' \" throws a LexException that can be stringified"
|
|
# https://github.com/hylang/hy/issues/1252
|
|
with lexe() as execinfo:
|
|
tokenize("' ")
|
|
check_ex(execinfo, [
|
|
' File "<string>", line 1\n',
|
|
" '\n",
|
|
' ^\n',
|
|
'LexException: Could not identify the next token.\n'])
|
|
|
|
|
|
def test_lex_expression_symbols():
|
|
""" Make sure that expressions produce symbols """
|
|
objs = tokenize("(foo bar)")
|
|
assert objs == [HyExpression([HySymbol("foo"), HySymbol("bar")])]
|
|
|
|
|
|
def test_lex_expression_strings():
|
|
""" Test that expressions can produce strings """
|
|
objs = tokenize("(foo \"bar\")")
|
|
assert objs == [HyExpression([HySymbol("foo"), HyString("bar")])]
|
|
|
|
|
|
def test_lex_expression_integer():
|
|
""" Make sure expressions can produce integers """
|
|
objs = tokenize("(foo 2)")
|
|
assert objs == [HyExpression([HySymbol("foo"), HyInteger(2)])]
|
|
|
|
|
|
def test_lex_symbols():
|
|
""" Make sure that symbols are valid expressions"""
|
|
objs = tokenize("foo ")
|
|
assert objs == [HySymbol("foo")]
|
|
|
|
|
|
def test_lex_strings():
|
|
""" Make sure that strings are valid expressions"""
|
|
objs = tokenize('"foo"')
|
|
assert objs == [HyString("foo")]
|
|
# Make sure backslash-escaped newlines work (see issue #831)
|
|
objs = tokenize(r"""
|
|
"a\
|
|
bc"
|
|
""")
|
|
assert objs == [HyString("abc")]
|
|
|
|
|
|
def test_lex_strings_exception():
|
|
""" Make sure tokenize throws when codec can't decode some bytes"""
|
|
with lexe() as execinfo:
|
|
tokenize('\"\\x8\"')
|
|
check_ex(execinfo, [
|
|
' File "<string>", line 1\n',
|
|
' "\\x8"\n',
|
|
' ^\n',
|
|
'LexException: Can\'t convert "\\x8" to a HyString\n'])
|
|
|
|
|
|
def test_lex_bracket_strings():
|
|
|
|
objs = tokenize("#[my delim[hello world]my delim]")
|
|
assert objs == [HyString("hello world")]
|
|
assert objs[0].brackets == "my delim"
|
|
|
|
objs = tokenize("#[[squid]]")
|
|
assert objs == [HyString("squid")]
|
|
assert objs[0].brackets == ""
|
|
|
|
|
|
def test_lex_integers():
|
|
""" Make sure that integers are valid expressions"""
|
|
objs = tokenize("42 ")
|
|
assert objs == [HyInteger(42)]
|
|
|
|
|
|
def test_lex_fractions():
|
|
""" Make sure that fractions are valid expressions"""
|
|
objs = tokenize("1/2")
|
|
assert objs == [HyExpression([HySymbol("fraction"), HyInteger(1),
|
|
HyInteger(2)])]
|
|
|
|
|
|
def test_lex_expression_float():
|
|
""" Make sure expressions can produce floats """
|
|
objs = tokenize("(foo 2.)")
|
|
assert objs == [HyExpression([HySymbol("foo"), HyFloat(2.)])]
|
|
objs = tokenize("(foo -0.5)")
|
|
assert objs == [HyExpression([HySymbol("foo"), HyFloat(-0.5)])]
|
|
objs = tokenize("(foo 1.e7)")
|
|
assert objs == [HyExpression([HySymbol("foo"), HyFloat(1.e7)])]
|
|
|
|
|
|
def test_lex_big_float():
|
|
# https://github.com/hylang/hy/issues/1448
|
|
assert tokenize("1e900") == [HyFloat(1e900)]
|
|
assert tokenize("1e900-1e900j") == [HyComplex(1e900, -1e900)]
|
|
|
|
|
|
def test_lex_nan_and_inf():
|
|
|
|
assert isnan(tokenize("NaN")[0])
|
|
assert tokenize("Nan") == [HySymbol("Nan")]
|
|
assert tokenize("nan") == [HySymbol("nan")]
|
|
assert tokenize("NAN") == [HySymbol("NAN")]
|
|
|
|
assert tokenize("Inf") == [HyFloat(float("inf"))]
|
|
assert tokenize("inf") == [HySymbol("inf")]
|
|
assert tokenize("INF") == [HySymbol("INF")]
|
|
|
|
assert tokenize("-Inf") == [HyFloat(float("-inf"))]
|
|
assert tokenize("-inf") == [HySymbol("-inf")]
|
|
assert tokenize("-INF") == [HySymbol("-INF")]
|
|
|
|
|
|
def test_lex_expression_complex():
|
|
""" Make sure expressions can produce complex """
|
|
|
|
def t(x): return tokenize("(foo {})".format(x))
|
|
|
|
def f(x): return [HyExpression([HySymbol("foo"), x])]
|
|
|
|
assert t("2.j") == f(HyComplex(2.j))
|
|
assert t("-0.5j") == f(HyComplex(-0.5j))
|
|
assert t("1.e7j") == f(HyComplex(1e7j))
|
|
assert t("j") == f(HySymbol("j"))
|
|
assert isnan(t("NaNj")[0][1].imag)
|
|
assert t("nanj") == f(HySymbol("nanj"))
|
|
assert t("Inf+Infj") == f(HyComplex(complex(float("inf"), float("inf"))))
|
|
assert t("Inf-Infj") == f(HyComplex(complex(float("inf"), float("-inf"))))
|
|
assert t("Inf-INFj") == f(HySymbol("Inf-INFj"))
|
|
|
|
|
|
def test_lex_digit_separators():
|
|
|
|
assert tokenize("1_000_000") == [HyInteger(1000000)]
|
|
assert tokenize("1,000,000") == [HyInteger(1000000)]
|
|
assert tokenize("1,000_000") == [HyInteger(1000000)]
|
|
assert tokenize("1_000,000") == [HyInteger(1000000)]
|
|
|
|
assert tokenize("0x_af") == [HyInteger(0xaf)]
|
|
assert tokenize("0x,af") == [HyInteger(0xaf)]
|
|
assert tokenize("0b_010") == [HyInteger(0b010)]
|
|
assert tokenize("0b,010") == [HyInteger(0b010)]
|
|
assert tokenize("0o_373") == [HyInteger(0o373)]
|
|
assert tokenize("0o,373") == [HyInteger(0o373)]
|
|
|
|
assert tokenize('1_2.3,4') == [HyFloat(12.34)]
|
|
assert tokenize('1_2e3,4') == [HyFloat(12e34)]
|
|
assert (tokenize("1,2/3_4") ==
|
|
[HyExpression([HySymbol("fraction"),
|
|
HyInteger(12), HyInteger(34)])])
|
|
assert tokenize("1,0_00j") == [HyComplex(1000j)]
|
|
|
|
assert tokenize("1,,,,___,____,,__,,2__,,,__") == [HyInteger(12)]
|
|
assert (tokenize("_1,,,,___,____,,__,,2__,,,__") ==
|
|
[HySymbol("_1,,,,___,____,,__,,2__,,,__")])
|
|
assert (tokenize("1,,,,___,____,,__,,2__,q,__") ==
|
|
[HySymbol("1,,,,___,____,,__,,2__,q,__")])
|
|
|
|
|
|
def test_lex_bad_attrs():
|
|
with lexe() as execinfo:
|
|
tokenize("1.foo")
|
|
check_ex(execinfo, [
|
|
' File "<string>", line 1\n',
|
|
' 1.foo\n',
|
|
' ^\n',
|
|
'LexException: Cannot access attribute on anything other'
|
|
' than a name (in order to get attributes of expressions,'
|
|
' use `(. <expression> <attr>)` or `(.<attr> <expression>)`)\n'])
|
|
|
|
with lexe(): tokenize("0.foo")
|
|
with lexe(): tokenize("1.5.foo")
|
|
with lexe(): tokenize("1e3.foo")
|
|
with lexe(): tokenize("5j.foo")
|
|
with lexe(): tokenize("3+5j.foo")
|
|
with lexe(): tokenize("3.1+5.1j.foo")
|
|
assert tokenize("j.foo")
|
|
with lexe(): tokenize("3/4.foo")
|
|
assert tokenize("a/1.foo")
|
|
assert tokenize("1/a.foo")
|
|
with lexe(): tokenize(":hello.foo")
|
|
|
|
|
|
def test_lex_column_counting():
|
|
entry = tokenize("(foo (one two))")[0]
|
|
assert entry.start_line == 1
|
|
assert entry.start_column == 1
|
|
assert entry.end_line == 1
|
|
assert entry.end_column == 15
|
|
|
|
symbol = entry[0]
|
|
assert symbol.start_line == 1
|
|
assert symbol.start_column == 2
|
|
assert symbol.end_line == 1
|
|
assert symbol.end_column == 4
|
|
|
|
inner_expr = entry[1]
|
|
assert inner_expr.start_line == 1
|
|
assert inner_expr.start_column == 6
|
|
assert inner_expr.end_line == 1
|
|
assert inner_expr.end_column == 14
|
|
|
|
|
|
def test_lex_column_counting_with_literal_newline():
|
|
string, symbol = tokenize('"apple\nblueberry" abc')
|
|
|
|
assert string.start_line == 1
|
|
assert string.start_column == 1
|
|
assert string.end_line == 2
|
|
assert string.end_column == 10
|
|
|
|
assert symbol.start_line == 2
|
|
assert symbol.start_column == 12
|
|
assert symbol.end_line == 2
|
|
assert symbol.end_column == 14
|
|
|
|
|
|
def test_lex_line_counting_multi():
|
|
""" Make sure we can do multi-line tokenization """
|
|
entries = tokenize("""
|
|
(foo (one two))
|
|
(foo bar)
|
|
""")
|
|
|
|
entry = entries[0]
|
|
|
|
assert entry.start_line == 2
|
|
assert entry.start_column == 1
|
|
|
|
assert entry.end_line == 2
|
|
assert entry.end_column == 15
|
|
|
|
entry = entries[1]
|
|
assert entry.start_line == 3
|
|
assert entry.start_column == 1
|
|
|
|
assert entry.end_line == 3
|
|
assert entry.end_column == 9
|
|
|
|
|
|
def test_lex_line_counting_multi_inner():
|
|
""" Make sure we can do multi-line tokenization (inner) """
|
|
entry = tokenize("""(foo
|
|
bar)""")[0]
|
|
inner = entry[0]
|
|
|
|
assert inner.start_line == 1
|
|
assert inner.start_column == 2
|
|
|
|
inner = entry[1]
|
|
|
|
assert inner.start_line == 2
|
|
assert inner.start_column == 5
|
|
|
|
|
|
def test_dicts():
|
|
""" Ensure that we can tokenize a dict. """
|
|
objs = tokenize("{foo bar bar baz}")
|
|
assert objs == [HyDict(["foo", "bar", "bar", "baz"])]
|
|
|
|
objs = tokenize("(bar {foo bar bar baz})")
|
|
assert objs == [HyExpression([HySymbol("bar"),
|
|
HyDict(["foo", "bar",
|
|
"bar", "baz"])])]
|
|
|
|
objs = tokenize("{(foo bar) (baz quux)}")
|
|
assert objs == [HyDict([
|
|
HyExpression([HySymbol("foo"), HySymbol("bar")]),
|
|
HyExpression([HySymbol("baz"), HySymbol("quux")])
|
|
])]
|
|
|
|
|
|
def test_sets():
|
|
""" Ensure that we can tokenize a set. """
|
|
objs = tokenize("#{1 2}")
|
|
assert objs == [HySet([HyInteger(1), HyInteger(2)])]
|
|
objs = tokenize("(bar #{foo bar baz})")
|
|
assert objs == [HyExpression([HySymbol("bar"),
|
|
HySet(["foo", "bar", "baz"])])]
|
|
|
|
objs = tokenize("#{(foo bar) (baz quux)}")
|
|
assert objs == [HySet([
|
|
HyExpression([HySymbol("foo"), HySymbol("bar")]),
|
|
HyExpression([HySymbol("baz"), HySymbol("quux")])
|
|
])]
|
|
|
|
# Duplicate items in a literal set should be okay (and should
|
|
# be preserved).
|
|
objs = tokenize("#{1 2 1 1 2 1}")
|
|
assert objs == [HySet([HyInteger(n) for n in [1, 2, 1, 1, 2, 1]])]
|
|
assert len(objs[0]) == 6
|
|
|
|
# https://github.com/hylang/hy/issues/1120
|
|
objs = tokenize("#{a 1}")
|
|
assert objs == [HySet([HySymbol("a"), HyInteger(1)])]
|
|
|
|
|
|
def test_nospace():
|
|
""" Ensure we can tokenize without spaces if we have to """
|
|
entry = tokenize("(foo(one two))")[0]
|
|
|
|
assert entry.start_line == 1
|
|
assert entry.start_column == 1
|
|
|
|
assert entry.end_line == 1
|
|
assert entry.end_column == 14
|
|
|
|
entry = entry[1]
|
|
assert entry.start_line == 1
|
|
assert entry.start_column == 5
|
|
|
|
assert entry.end_line == 1
|
|
assert entry.end_column == 13
|
|
|
|
|
|
def test_escapes():
|
|
""" Ensure we can escape things """
|
|
entry = tokenize(r"""(foo "foo\n")""")[0]
|
|
assert entry[1] == "foo\n"
|
|
|
|
entry = tokenize(r"""(foo r"foo\s")""")[0]
|
|
assert entry[1] == r"foo\s"
|
|
|
|
|
|
def test_unicode_escapes():
|
|
"""Ensure unicode escapes are handled correctly"""
|
|
s = r'"a\xac\u1234\u20ac\U00008000"'
|
|
assert len(s) == 29
|
|
entry = tokenize(s)[0]
|
|
assert len(entry) == 5
|
|
assert [ord(x) for x in entry] == [97, 172, 4660, 8364, 32768]
|
|
|
|
|
|
def test_complex():
|
|
"""Ensure we tokenize complex numbers properly"""
|
|
# This is a regression test for #143
|
|
entry = tokenize("(1j)")[0][0]
|
|
assert entry == HyComplex("1.0j")
|
|
entry = tokenize("(j)")[0][0]
|
|
assert entry == HySymbol("j")
|
|
|
|
|
|
def test_tag_macro():
|
|
"""Ensure tag macros are handled properly"""
|
|
entry = tokenize("#^()")
|
|
assert entry[0][0] == HySymbol("dispatch-tag-macro")
|
|
assert entry[0][1] == HyString("^")
|
|
assert len(entry[0]) == 3
|
|
|
|
|
|
def test_lex_comment_382():
|
|
"""Ensure that we can tokenize sources with a comment at the end"""
|
|
entry = tokenize("foo ;bar\n;baz")
|
|
assert entry == [HySymbol("foo")]
|
|
|
|
|
|
def test_discard():
|
|
"""Check that discarded terms are removed properly."""
|
|
# empty
|
|
assert tokenize("") == []
|
|
# single
|
|
assert tokenize("#_1") == []
|
|
# multiple
|
|
assert tokenize("#_1 #_2") == []
|
|
assert tokenize("#_1 #_2 #_3") == []
|
|
# nested discard
|
|
assert tokenize("#_ #_1 2") == []
|
|
assert tokenize("#_ #_ #_1 2 3") == []
|
|
# trailing
|
|
assert tokenize("0") == [0]
|
|
assert tokenize("0 #_1") == [0]
|
|
assert tokenize("0 #_1 #_2") == [0]
|
|
# leading
|
|
assert tokenize("2") == [2]
|
|
assert tokenize("#_1 2") == [2]
|
|
assert tokenize("#_0 #_1 2") == [2]
|
|
assert tokenize("#_ #_0 1 2") == [2]
|
|
# both
|
|
assert tokenize("#_1 2 #_3") == [2]
|
|
assert tokenize("#_0 #_1 2 #_ #_3 4") == [2]
|
|
# inside
|
|
assert tokenize("0 #_1 2") == [0, 2]
|
|
assert tokenize("0 #_1 #_2 3") == [0, 3]
|
|
assert tokenize("0 #_ #_1 2 3") == [0, 3]
|
|
# in HyList
|
|
assert tokenize("[]") == [HyList([])]
|
|
assert tokenize("[#_1]") == [HyList([])]
|
|
assert tokenize("[#_1 #_2]") == [HyList([])]
|
|
assert tokenize("[#_ #_1 2]") == [HyList([])]
|
|
assert tokenize("[0]") == [HyList([HyInteger(0)])]
|
|
assert tokenize("[0 #_1]") == [HyList([HyInteger(0)])]
|
|
assert tokenize("[0 #_1 #_2]") == [HyList([HyInteger(0)])]
|
|
assert tokenize("[2]") == [HyList([HyInteger(2)])]
|
|
assert tokenize("[#_1 2]") == [HyList([HyInteger(2)])]
|
|
assert tokenize("[#_0 #_1 2]") == [HyList([HyInteger(2)])]
|
|
assert tokenize("[#_ #_0 1 2]") == [HyList([HyInteger(2)])]
|
|
# in HySet
|
|
assert tokenize("#{}") == [HySet()]
|
|
assert tokenize("#{#_1}") == [HySet()]
|
|
assert tokenize("#{0 #_1}") == [HySet([HyInteger(0)])]
|
|
assert tokenize("#{#_1 0}") == [HySet([HyInteger(0)])]
|
|
# in HyDict
|
|
assert tokenize("{}") == [HyDict()]
|
|
assert tokenize("{#_1}") == [HyDict()]
|
|
assert tokenize("{#_0 1 2}") == [HyDict([HyInteger(1), HyInteger(2)])]
|
|
assert tokenize("{1 #_0 2}") == [HyDict([HyInteger(1), HyInteger(2)])]
|
|
assert tokenize("{1 2 #_0}") == [HyDict([HyInteger(1), HyInteger(2)])]
|
|
# in HyExpression
|
|
assert tokenize("()") == [HyExpression()]
|
|
assert tokenize("(#_foo)") == [HyExpression()]
|
|
assert tokenize("(#_foo bar)") == [HyExpression([HySymbol("bar")])]
|
|
assert tokenize("(foo #_bar)") == [HyExpression([HySymbol("foo")])]
|
|
assert tokenize("(foo :bar 1)") == [HyExpression([HySymbol("foo"), HyKeyword("bar"), HyInteger(1)])]
|
|
assert tokenize("(foo #_:bar 1)") == [HyExpression([HySymbol("foo"), HyInteger(1)])]
|
|
assert tokenize("(foo :bar #_1)") == [HyExpression([HySymbol("foo"), HyKeyword("bar")])]
|
|
# discard term with nesting
|
|
assert tokenize("[1 2 #_[a b c [d e [f g] h]] 3 4]") == [
|
|
HyList([HyInteger(1), HyInteger(2), HyInteger(3), HyInteger(4)])
|
|
]
|
|
# discard with other prefix syntax
|
|
assert tokenize("a #_'b c") == [HySymbol("a"), HySymbol("c")]
|
|
assert tokenize("a '#_b c") == [HySymbol("a"), HyExpression([HySymbol("quote"), HySymbol("c")])]
|
|
assert tokenize("a '#_b #_c d") == [HySymbol("a"), HyExpression([HySymbol("quote"), HySymbol("d")])]
|
|
assert tokenize("a '#_ #_b c d") == [HySymbol("a"), HyExpression([HySymbol("quote"), HySymbol("d")])]
|
|
|
|
|
|
def test_lex_exception_filtering(capsys):
|
|
"""Confirm that the exception filtering works for lexer errors."""
|
|
|
|
# First, test for PrematureEndOfInput
|
|
with peoi() as execinfo:
|
|
tokenize(" \n (foo\n \n")
|
|
check_trace_output(capsys, execinfo, [
|
|
' File "<string>", line 2',
|
|
' (foo',
|
|
' ^',
|
|
'PrematureEndOfInput: Premature end of input'])
|
|
|
|
# Now, for a generic LexException
|
|
with lexe() as execinfo:
|
|
tokenize(" \n\n 1.foo ")
|
|
check_trace_output(capsys, execinfo, [
|
|
' File "<string>", line 3',
|
|
' 1.foo',
|
|
' ^',
|
|
'LexException: Cannot access attribute on anything other'
|
|
' than a name (in order to get attributes of expressions,'
|
|
' use `(. <expression> <attr>)` or `(.<attr> <expression>)`)'])
|