Merge pull request #1552 from waigx/fix/ucs2
Fix `mangle` for Pythons compiled with UCS-2
This commit is contained in:
commit
cce8b87483
@ -25,6 +25,10 @@ PY35 = sys.version_info >= (3, 5)
|
|||||||
PY36 = sys.version_info >= (3, 6)
|
PY36 = sys.version_info >= (3, 6)
|
||||||
PY37 = sys.version_info >= (3, 7)
|
PY37 = sys.version_info >= (3, 7)
|
||||||
|
|
||||||
|
# The value of UCS4 indicates whether Unicode strings are stored as UCS-4.
|
||||||
|
# It is always true on Pythons >= 3.3, which use USC-4 on all systems.
|
||||||
|
UCS4 = sys.maxunicode == 0x10FFFF
|
||||||
|
|
||||||
str_type = str if PY3 else unicode # NOQA
|
str_type = str if PY3 else unicode # NOQA
|
||||||
bytes_type = bytes if PY3 else str # NOQA
|
bytes_type = bytes if PY3 else str # NOQA
|
||||||
long_type = int if PY3 else long # NOQA
|
long_type = int if PY3 else long # NOQA
|
||||||
|
@ -10,7 +10,7 @@ import string, re, unicodedata
|
|||||||
|
|
||||||
from rply import ParserGenerator
|
from rply import ParserGenerator
|
||||||
|
|
||||||
from hy._compat import PY3, str_type, isidentifier
|
from hy._compat import PY3, str_type, isidentifier, UCS4
|
||||||
from hy.models import (HyBytes, HyComplex, HyCons, HyDict, HyExpression,
|
from hy.models import (HyBytes, HyComplex, HyCons, HyDict, HyExpression,
|
||||||
HyFloat, HyInteger, HyKeyword, HyList, HySet, HyString,
|
HyFloat, HyInteger, HyKeyword, HyList, HySet, HyString,
|
||||||
HySymbol)
|
HySymbol)
|
||||||
@ -25,14 +25,28 @@ pg = ParserGenerator(
|
|||||||
|
|
||||||
mangle_delim = 'Δ' if PY3 else 'X'
|
mangle_delim = 'Δ' if PY3 else 'X'
|
||||||
|
|
||||||
|
def unicode_to_ucs4iter(ustr):
|
||||||
|
# Covert a unicode string to an iterable object,
|
||||||
|
# elements in the object are single USC-4 unicode characters
|
||||||
|
if UCS4:
|
||||||
|
return ustr
|
||||||
|
ucs4_list = list(ustr)
|
||||||
|
for i, u in enumerate(ucs4_list):
|
||||||
|
if 0xD7FF < ord(u) < 0xDC00:
|
||||||
|
ucs4_list[i] += ucs4_list[i + 1]
|
||||||
|
del ucs4_list[i + 1]
|
||||||
|
return ucs4_list
|
||||||
|
|
||||||
def mangle(s):
|
def mangle(s):
|
||||||
"""Stringify the argument and convert it to a valid Python identifier
|
"""Stringify the argument and convert it to a valid Python identifier
|
||||||
according to Hy's mangling rules."""
|
according to Hy's mangling rules."""
|
||||||
|
def unicode_char_to_hex(uchr):
|
||||||
|
# Covert a unicode char to hex string, without prefix
|
||||||
|
return uchr.encode('unicode-escape').decode('utf-8').lstrip('\\U').lstrip('\\u').lstrip('0')
|
||||||
|
|
||||||
assert s
|
assert s
|
||||||
|
|
||||||
s = str_type(s)
|
s = str_type(s)
|
||||||
|
|
||||||
s = s.replace("-", "_")
|
s = s.replace("-", "_")
|
||||||
s2 = s.lstrip('_')
|
s2 = s.lstrip('_')
|
||||||
leading_underscores = '_' * (len(s) - len(s2))
|
leading_underscores = '_' * (len(s) - len(s2))
|
||||||
@ -50,8 +64,8 @@ def mangle(s):
|
|||||||
# allowed at the start of an identifier.
|
# allowed at the start of an identifier.
|
||||||
else '{0}{1}{0}'.format(mangle_delim,
|
else '{0}{1}{0}'.format(mangle_delim,
|
||||||
unicodedata.name(c, '').lower().replace('-', 'H').replace(' ', '_')
|
unicodedata.name(c, '').lower().replace('-', 'H').replace(' ', '_')
|
||||||
or 'U{:x}'.format(ord(c)))
|
or 'U{}'.format(unicode_char_to_hex(c)))
|
||||||
for c in s)
|
for c in unicode_to_ucs4iter(s))
|
||||||
|
|
||||||
s = leading_underscores + s
|
s = leading_underscores + s
|
||||||
assert isidentifier(s)
|
assert isidentifier(s)
|
||||||
|
Loading…
Reference in New Issue
Block a user