hy/tests/lex/test_lex.py

408 lines
12 KiB
Python
Raw Normal View History

2013-03-18 15:27:14 +01:00
# Copyright (c) 2013 Paul Tagliamonte <paultag@debian.org>
# Copyright (c) 2014 Nicolas Dandrimont <nicolas.dandrimont@crans.org>
2013-03-03 02:24:32 +01:00
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
2013-03-01 04:37:23 +01:00
from hy.models.expression import HyExpression
2013-03-03 01:28:10 +01:00
from hy.models.integer import HyInteger
2013-04-11 09:54:59 +02:00
from hy.models.float import HyFloat
from hy.models.complex import HyComplex
2013-03-01 04:37:23 +01:00
from hy.models.symbol import HySymbol
2013-03-03 00:40:00 +01:00
from hy.models.string import HyString
2013-03-07 02:59:45 +01:00
from hy.models.dict import HyDict
from hy.models.list import HyList
2015-06-26 23:47:35 +02:00
from hy.models.set import HySet
from hy.models.cons import HyCons
2013-03-01 04:27:20 +01:00
from hy.lex import LexException, PrematureEndOfInput, tokenize
2013-03-03 01:28:10 +01:00
2013-03-01 04:27:20 +01:00
2013-03-03 01:48:29 +01:00
def test_lex_exception():
""" Ensure tokenize throws a fit on a partial input """
try:
tokenize("(foo")
2013-04-02 02:00:37 +02:00
assert True is False
except PrematureEndOfInput:
2013-03-03 01:48:29 +01:00
pass
try:
tokenize("{foo bar")
2013-04-02 02:00:37 +02:00
assert True is False
except PrematureEndOfInput:
pass
try:
tokenize("(defn foo [bar]")
assert True is False
except PrematureEndOfInput:
pass
try:
tokenize("(foo \"bar")
assert True is False
except PrematureEndOfInput:
pass
def test_unbalanced_exception():
"""Ensure the tokenization fails on unbalanced expressions"""
try:
tokenize("(bar))")
assert True is False
except LexException:
pass
try:
tokenize("(baz [quux]])")
2013-04-02 02:00:37 +02:00
assert True is False
except LexException:
pass
2013-03-03 01:48:29 +01:00
2013-03-03 00:40:00 +01:00
def test_lex_expression_symbols():
2013-03-03 01:48:29 +01:00
""" Make sure that expressions produce symbols """
2013-03-01 04:27:20 +01:00
objs = tokenize("(foo bar)")
2013-03-01 04:37:23 +01:00
assert objs == [HyExpression([HySymbol("foo"), HySymbol("bar")])]
2013-03-03 00:40:00 +01:00
2013-03-03 01:41:55 +01:00
2013-03-03 00:40:00 +01:00
def test_lex_expression_strings():
""" Test that expressions can produce strings """
2013-03-03 00:40:00 +01:00
objs = tokenize("(foo \"bar\")")
assert objs == [HyExpression([HySymbol("foo"), HyString("bar")])]
2013-03-03 01:28:10 +01:00
2013-03-03 01:41:55 +01:00
2013-03-03 01:28:10 +01:00
def test_lex_expression_integer():
2013-03-03 01:48:29 +01:00
""" Make sure expressions can produce integers """
2013-03-03 01:28:10 +01:00
objs = tokenize("(foo 2)")
assert objs == [HyExpression([HySymbol("foo"), HyInteger(2)])]
2013-03-03 01:41:55 +01:00
def test_lex_symbols():
""" Make sure that symbols are valid expressions"""
objs = tokenize("foo ")
assert objs == [HySymbol("foo")]
2013-04-11 08:26:56 +02:00
def test_lex_strings():
""" Make sure that strings are valid expressions"""
objs = tokenize('"foo"')
assert objs == [HyString("foo")]
2015-08-22 22:26:23 +02:00
# Make sure backslash-escaped newlines work (see issue #831)
objs = tokenize(r"""
"a\
bc"
""")
assert objs == [HyString("abc")]
def test_lex_integers():
""" Make sure that integers are valid expressions"""
objs = tokenize("42 ")
assert objs == [HyInteger(42)]
def test_lex_fractions():
""" Make sure that fractions are valid expressions"""
objs = tokenize("1/2")
assert objs == [HyExpression([HySymbol("fraction"), HyInteger(1),
HyInteger(2)])]
2013-04-11 09:54:59 +02:00
def test_lex_expression_float():
""" Make sure expressions can produce floats """
objs = tokenize("(foo 2.)")
assert objs == [HyExpression([HySymbol("foo"), HyFloat(2.)])]
objs = tokenize("(foo -0.5)")
assert objs == [HyExpression([HySymbol("foo"), HyFloat(-0.5)])]
objs = tokenize("(foo 1.e7)")
assert objs == [HyExpression([HySymbol("foo"), HyFloat(1.e7)])]
2013-04-11 09:54:59 +02:00
def test_lex_expression_complex():
""" Make sure expressions can produce complex """
objs = tokenize("(foo 2.j)")
assert objs == [HyExpression([HySymbol("foo"), HyComplex(2.j)])]
objs = tokenize("(foo -0.5j)")
assert objs == [HyExpression([HySymbol("foo"), HyComplex(-0.5j)])]
objs = tokenize("(foo 1.e7j)")
assert objs == [HyExpression([HySymbol("foo"), HyComplex(1.e7j)])]
objs = tokenize("(foo j)")
assert objs == [HyExpression([HySymbol("foo"), HySymbol("j")])]
2013-04-11 09:54:59 +02:00
2013-03-03 01:41:55 +01:00
def test_lex_line_counting():
2013-03-03 01:48:29 +01:00
""" Make sure we can count lines / columns """
2013-03-03 02:41:57 +01:00
entry = tokenize("(foo (one two))")[0]
2013-03-03 01:41:55 +01:00
assert entry.start_line == 1
assert entry.start_column == 1
assert entry.end_line == 1
2013-03-03 02:41:57 +01:00
assert entry.end_column == 15
entry = entry[1]
assert entry.start_line == 1
assert entry.start_column == 6
assert entry.end_line == 1
assert entry.end_column == 14
def test_lex_line_counting_multi():
""" Make sure we can do multi-line tokenization """
entries = tokenize("""
(foo (one two))
(foo bar)
""")
entry = entries[0]
assert entry.start_line == 2
assert entry.start_column == 1
assert entry.end_line == 2
assert entry.end_column == 15
entry = entries[1]
assert entry.start_line == 3
assert entry.start_column == 1
assert entry.end_line == 3
assert entry.end_column == 9
2013-03-03 20:03:59 +01:00
def test_lex_line_counting_multi_inner():
""" Make sure we can do multi-line tokenization (inner) """
entry = tokenize("""(foo
bar)""")[0]
inner = entry[0]
assert inner.start_line == 1
assert inner.start_column == 2
inner = entry[1]
assert inner.start_line == 2
assert inner.start_column == 5
2013-03-07 02:59:45 +01:00
2013-03-08 01:23:11 +01:00
def test_dicts():
2013-03-07 02:59:45 +01:00
""" Ensure that we can tokenize a dict. """
objs = tokenize("{foo bar bar baz}")
assert objs == [HyDict(["foo", "bar", "bar", "baz"])]
2013-03-08 01:23:11 +01:00
2013-03-08 05:01:17 +01:00
objs = tokenize("(bar {foo bar bar baz})")
assert objs == [HyExpression([HySymbol("bar"),
HyDict(["foo", "bar",
"bar", "baz"])])]
2013-03-08 05:01:17 +01:00
2013-05-14 12:01:23 +02:00
objs = tokenize("{(foo bar) (baz quux)}")
assert objs == [HyDict([
HyExpression([HySymbol("foo"), HySymbol("bar")]),
HyExpression([HySymbol("baz"), HySymbol("quux")])
])]
2013-03-08 01:23:11 +01:00
2015-06-26 23:47:35 +02:00
def test_sets():
""" Ensure that we can tokenize a set. """
objs = tokenize("#{1 2}")
assert objs == [HySet([HyInteger(1), HyInteger(2)])]
objs = tokenize("(bar #{foo bar baz})")
assert objs == [HyExpression([HySymbol("bar"),
HySet(["foo", "bar", "baz"])])]
objs = tokenize("#{(foo bar) (baz quux)}")
assert objs == [HySet([
HyExpression([HySymbol("foo"), HySymbol("bar")]),
HyExpression([HySymbol("baz"), HySymbol("quux")])
])]
# Duplicate items in a literal set should be okay (and should
# be preserved).
objs = tokenize("#{1 2 1 1 2 1}")
assert objs == [HySet([HyInteger(n) for n in [1, 2, 1, 1, 2, 1]])]
assert len(objs[0]) == 6
# https://github.com/hylang/hy/issues/1120
objs = tokenize("#{a 1}")
assert objs == [HySet([HySymbol("a"), HyInteger(1)])]
2015-06-26 23:47:35 +02:00
2013-03-08 01:23:11 +01:00
def test_nospace():
""" Ensure we can tokenize without spaces if we have to """
entry = tokenize("(foo(one two))")[0]
assert entry.start_line == 1
assert entry.start_column == 1
assert entry.end_line == 1
assert entry.end_column == 14
entry = entry[1]
assert entry.start_line == 1
assert entry.start_column == 5
assert entry.end_line == 1
assert entry.end_column == 13
2013-04-02 04:07:05 +02:00
def test_escapes():
""" Ensure we can escape things """
entry = tokenize("(foo \"foo\\n\")")[0]
assert entry[1] == "foo\n"
entry = tokenize("(foo \"foo\\s\")")[0]
assert entry[1] == "foo\\s"
def test_unicode_escapes():
"""Ensure unicode escapes are handled correctly"""
s = r'"a\xac\u1234\u20ac\U00008000"'
assert len(s) == 29
entry = tokenize(s)[0]
assert len(entry) == 5
assert [ord(x) for x in entry] == [97, 172, 4660, 8364, 32768]
2013-04-02 04:07:05 +02:00
def test_hashbang():
""" Ensure we can escape things """
entry = tokenize("#!this is a comment\n")
assert entry == []
def test_complex():
"""Ensure we tokenize complex numbers properly"""
# This is a regression test for #143
entry = tokenize("(1j)")[0][0]
assert entry == HyComplex("1.0j")
entry = tokenize("(j)")[0][0]
assert entry == HySymbol("j")
def test_reader_macro():
"""Ensure reader macros are handles properly"""
entry = tokenize("#^()")
assert entry[0][0] == HySymbol("dispatch_reader_macro")
assert entry[0][1] == HyString("^")
assert len(entry[0]) == 3
2014-01-02 03:13:49 +01:00
def test_lex_comment_382():
"""Ensure that we can tokenize sources with a comment at the end"""
entry = tokenize("foo ;bar\n;baz")
assert entry == [HySymbol("foo")]
def test_lex_mangling_star():
"""Ensure that mangling starred identifiers works according to plan"""
entry = tokenize("*foo*")
assert entry == [HySymbol("FOO")]
entry = tokenize("*")
assert entry == [HySymbol("*")]
entry = tokenize("*foo")
assert entry == [HySymbol("*foo")]
def test_lex_mangling_hyphen():
"""Ensure that hyphens get translated to underscores during mangling"""
entry = tokenize("foo-bar")
assert entry == [HySymbol("foo_bar")]
entry = tokenize("-")
assert entry == [HySymbol("-")]
def test_lex_mangling_qmark():
"""Ensure that identifiers ending with a question mark get mangled ok"""
entry = tokenize("foo?")
assert entry == [HySymbol("is_foo")]
entry = tokenize("?")
assert entry == [HySymbol("?")]
entry = tokenize("im?foo")
assert entry == [HySymbol("im?foo")]
entry = tokenize(".foo?")
assert entry == [HySymbol(".is_foo")]
entry = tokenize("foo.bar?")
assert entry == [HySymbol("foo.is_bar")]
entry = tokenize("foo?.bar")
assert entry == [HySymbol("is_foo.bar")]
entry = tokenize(".foo?.bar.baz?")
assert entry == [HySymbol(".is_foo.bar.is_baz")]
def test_lex_mangling_bang():
"""Ensure that identifiers ending with a bang get mangled ok"""
entry = tokenize("foo!")
assert entry == [HySymbol("foo_bang")]
entry = tokenize("!")
assert entry == [HySymbol("!")]
entry = tokenize("im!foo")
assert entry == [HySymbol("im!foo")]
entry = tokenize(".foo!")
assert entry == [HySymbol(".foo_bang")]
entry = tokenize("foo.bar!")
assert entry == [HySymbol("foo.bar_bang")]
entry = tokenize("foo!.bar")
assert entry == [HySymbol("foo_bang.bar")]
entry = tokenize(".foo!.bar.baz!")
assert entry == [HySymbol(".foo_bang.bar.baz_bang")]
def test_unmangle():
import sys
f = sys.modules["hy.lex.parser"].hy_symbol_unmangle
assert f("FOO") == "*foo*"
assert f("<") == "<"
assert f("FOOa") == "FOOa"
assert f("foo_bar") == "foo-bar"
assert f("_") == "_"
assert f("is_foo") == "foo?"
assert f("is_") == "is-"
assert f("foo_bang") == "foo!"
assert f("_bang") == "-bang"
def test_simple_cons():
"""Check that cons gets tokenized correctly"""
entry = tokenize("(a . b)")[0]
assert entry == HyCons(HySymbol("a"), HySymbol("b"))
def test_dotted_list():
"""Check that dotted lists get tokenized correctly"""
entry = tokenize("(a b c . (d . e))")[0]
assert entry == HyCons(HySymbol("a"),
HyCons(HySymbol("b"),
HyCons(HySymbol("c"),
HyCons(HySymbol("d"),
HySymbol("e")))))
def test_cons_list():
"""Check that cons of something and a list gets tokenized as a list"""
entry = tokenize("(a . [])")[0]
assert entry == HyList([HySymbol("a")])
assert type(entry) == HyList
entry = tokenize("(a . ())")[0]
assert entry == HyExpression([HySymbol("a")])
assert type(entry) == HyExpression
entry = tokenize("(a b . {})")[0]
assert entry == HyDict([HySymbol("a"), HySymbol("b")])
assert type(entry) == HyDict