diff --git a/hy/_compat.py b/hy/_compat.py index cbe0fad..518434c 100644 --- a/hy/_compat.py +++ b/hy/_compat.py @@ -9,10 +9,6 @@ PY36 = sys.version_info >= (3, 6) PY37 = sys.version_info >= (3, 7) PY38 = sys.version_info >= (3, 8) -# The value of UCS4 indicates whether Unicode strings are stored as UCS-4. -# It is always true on Pythons >= 3.3, which use USC-4 on all systems. -UCS4 = sys.maxunicode == 0x10FFFF - def reraise(exc_type, value, traceback=None): try: diff --git a/hy/lex/__init__.py b/hy/lex/__init__.py index b29709c..e2e5a26 100644 --- a/hy/lex/__init__.py +++ b/hy/lex/__init__.py @@ -8,7 +8,7 @@ import re import sys import unicodedata -from hy._compat import isidentifier, UCS4 +from hy._compat import isidentifier from hy.lex.exceptions import PrematureEndOfInput, LexException # NOQA from hy.models import HyExpression, HySymbol @@ -135,7 +135,7 @@ def mangle(s): else '{0}{1}{0}'.format(mangle_delim, unicodedata.name(c, '').lower().replace('-', 'H').replace(' ', '_') or 'U{}'.format(unicode_char_to_hex(c))) - for c in unicode_to_ucs4iter(s)) + for c in s) s = leading_underscores + s assert isidentifier(s) @@ -168,19 +168,6 @@ def unmangle(s): return '-' * leading_underscores + s -def unicode_to_ucs4iter(ustr): - # Covert a unicode string to an iterable object, - # elements in the object are single USC-4 unicode characters - if UCS4: - return ustr - ucs4_list = list(ustr) - for i, u in enumerate(ucs4_list): - if 0xD7FF < ord(u) < 0xDC00: - ucs4_list[i] += ucs4_list[i + 1] - del ucs4_list[i + 1] - return ucs4_list - - def read(from_file=sys.stdin, eof=""): """Read from input and returns a tokenized string.