Remove handling of UCS-2

This commit is contained in:
Kodi Arfer 2019-05-20 15:48:29 -04:00
parent c255f0d03c
commit 7991c59480
2 changed files with 2 additions and 19 deletions

View File

@ -9,10 +9,6 @@ PY36 = sys.version_info >= (3, 6)
PY37 = sys.version_info >= (3, 7) PY37 = sys.version_info >= (3, 7)
PY38 = sys.version_info >= (3, 8) PY38 = sys.version_info >= (3, 8)
# The value of UCS4 indicates whether Unicode strings are stored as UCS-4.
# It is always true on Pythons >= 3.3, which use USC-4 on all systems.
UCS4 = sys.maxunicode == 0x10FFFF
def reraise(exc_type, value, traceback=None): def reraise(exc_type, value, traceback=None):
try: try:

View File

@ -8,7 +8,7 @@ import re
import sys import sys
import unicodedata import unicodedata
from hy._compat import isidentifier, UCS4 from hy._compat import isidentifier
from hy.lex.exceptions import PrematureEndOfInput, LexException # NOQA from hy.lex.exceptions import PrematureEndOfInput, LexException # NOQA
from hy.models import HyExpression, HySymbol from hy.models import HyExpression, HySymbol
@ -135,7 +135,7 @@ def mangle(s):
else '{0}{1}{0}'.format(mangle_delim, else '{0}{1}{0}'.format(mangle_delim,
unicodedata.name(c, '').lower().replace('-', 'H').replace(' ', '_') unicodedata.name(c, '').lower().replace('-', 'H').replace(' ', '_')
or 'U{}'.format(unicode_char_to_hex(c))) or 'U{}'.format(unicode_char_to_hex(c)))
for c in unicode_to_ucs4iter(s)) for c in s)
s = leading_underscores + s s = leading_underscores + s
assert isidentifier(s) assert isidentifier(s)
@ -168,19 +168,6 @@ def unmangle(s):
return '-' * leading_underscores + s return '-' * leading_underscores + s
def unicode_to_ucs4iter(ustr):
# Covert a unicode string to an iterable object,
# elements in the object are single USC-4 unicode characters
if UCS4:
return ustr
ucs4_list = list(ustr)
for i, u in enumerate(ucs4_list):
if 0xD7FF < ord(u) < 0xDC00:
ucs4_list[i] += ucs4_list[i + 1]
del ucs4_list[i + 1]
return ucs4_list
def read(from_file=sys.stdin, eof=""): def read(from_file=sys.stdin, eof=""):
"""Read from input and returns a tokenized string. """Read from input and returns a tokenized string.