Skip to content

Commit e04fff8

Browse files
committed
Rewrite support_lone_surrogates to feature-sniff.
1 parent 625303f commit e04fff8

File tree

1 file changed

+17
-8
lines changed

1 file changed

+17
-8
lines changed

html5lib/utils.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from __future__ import absolute_import, division, unicode_literals
22

3-
import platform
43
from types import ModuleType
54

5+
from six import text_type
6+
67
try:
78
import xml.etree.cElementTree as default_etree
89
except ImportError:
@@ -15,13 +16,21 @@
1516

1617

1718
# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
18-
# added to the below test. In general this would be any platform using
19-
# UTF-16 as its encoding of unicode strings, such as Jython. This is
20-
# because UTF-16 itself is based on the use of such surrogates, and
21-
# there is no mechanism to further escape such escapes.
22-
#
23-
# Otherwise we assume such support.
24-
supports_lone_surrogates = platform.python_implementation() != "Jython"
19+
# caught by the below test. In general this would be any platform
20+
# using UTF-16 as its encoding of unicode strings, such as
21+
# Jython. This is because UTF-16 itself is based on the use of such
22+
# surrogates, and there is no mechanism to further escape such
23+
# escapes.
24+
try:
25+
_x = eval('"\\uD800"')
26+
if not isinstance(_x, text_type):
27+
# We need this with u"" because of http://bugs.jython.org/issue2039
28+
_x = eval('u"\\uD800"')
29+
assert isinstance(_x, text_type)
30+
except:
31+
supports_lone_surrogates = False
32+
else:
33+
supports_lone_surrogates = True
2534

2635

2736
class MethodDispatcher(dict):

0 commit comments

Comments
 (0)