diff --git a/hy/lex/parser.py b/hy/lex/parser.py index c4df2a5..27180c5 100755 --- a/hy/lex/parser.py +++ b/hy/lex/parser.py @@ -31,8 +31,11 @@ def set_boundaries(fun): ret.end_line = end.lineno ret.end_column = end.colno else: - ret.end_line = start.lineno - ret.end_column = start.colno + len(p[0].value) + v = p[0].value + ret.end_line = start.lineno + v.count('\n') + ret.end_column = (len(v) - v.rindex('\n') - 1 + if '\n' in v + else start.colno + len(v) - 1) return ret return wrapped diff --git a/hy/models.py b/hy/models.py index 478c691..ef51a26 100644 --- a/hy/models.py +++ b/hy/models.py @@ -33,6 +33,11 @@ class HyObject(object): """ Generic Hy Object model. This is helpful to inject things into all the Hy lexing Objects at once. + + The position properties (`start_line`, `end_line`, `start_column`, + `end_column`) are each 1-based and inclusive. For example, a symbol + `abc` starting at the first column would have `start_column` 1 and + `end_column` 3. """ __properties__ = ["module", "start_line", "end_line", "start_column", "end_column"] diff --git a/tests/test_lex.py b/tests/test_lex.py index 99658c6..304f69a 100644 --- a/tests/test_lex.py +++ b/tests/test_lex.py @@ -247,6 +247,12 @@ def test_lex_column_counting(): assert entry.end_line == 1 assert entry.end_column == 15 + symbol = entry[0] + assert symbol.start_line == 1 + assert symbol.start_column == 2 + assert symbol.end_line == 1 + assert symbol.end_column == 4 + inner_expr = entry[1] assert inner_expr.start_line == 1 assert inner_expr.start_column == 6 @@ -254,6 +260,20 @@ def test_lex_column_counting(): assert inner_expr.end_column == 14 +def test_lex_column_counting_with_literal_newline(): + string, symbol = tokenize('"apple\nblueberry" abc') + + assert string.start_line == 1 + assert string.start_column == 1 + assert string.end_line == 2 + assert string.end_column == 10 + + assert symbol.start_line == 2 + assert symbol.start_column == 12 + assert symbol.end_line == 2 + assert symbol.end_column == 14 + + def test_lex_line_counting_multi(): """ Make sure we can do multi-line tokenization """ entries = tokenize("""