Skip to content

Commit e5d395c

Browse files
committed
Silence wrong-import-position
1 parent a017b88 commit e5d395c

File tree

6 files changed

+60
-58
lines changed

6 files changed

+60
-58
lines changed

html5lib/serializer/htmlserializer.py

Lines changed: 49 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
import re
55

6+
from codecs import register_error, xmlcharrefreplace_errors
7+
68
from ..constants import voidElements, booleanAttributes, spaceCharacters
79
from ..constants import rcdataElements, entities, xmlEntities
810
from .. import utils
@@ -21,61 +23,54 @@
2123
"\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
2224
"\u3000]")
2325

24-
try:
25-
from codecs import register_error, xmlcharrefreplace_errors
26-
except ImportError:
27-
unicode_encode_errors = "strict"
28-
else:
29-
unicode_encode_errors = "htmlentityreplace"
30-
31-
encode_entity_map = {}
32-
is_ucs4 = len("\U0010FFFF") == 1
33-
for k, v in list(entities.items()):
34-
# skip multi-character entities
35-
if ((is_ucs4 and len(v) > 1) or
36-
(not is_ucs4 and len(v) > 2)):
37-
continue
38-
if v != "&":
39-
if len(v) == 2:
40-
v = utils.surrogatePairToCodepoint(v)
41-
else:
42-
v = ord(v)
43-
if v not in encode_entity_map or k.islower():
44-
# prefer < over < and similarly for &, >, etc.
45-
encode_entity_map[v] = k
46-
47-
def htmlentityreplace_errors(exc):
48-
if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
49-
res = []
50-
codepoints = []
51-
skip = False
52-
for i, c in enumerate(exc.object[exc.start:exc.end]):
53-
if skip:
54-
skip = False
55-
continue
56-
index = i + exc.start
57-
if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
58-
codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
59-
skip = True
60-
else:
61-
codepoint = ord(c)
62-
codepoints.append(codepoint)
63-
for cp in codepoints:
64-
e = encode_entity_map.get(cp)
65-
if e:
66-
res.append("&")
67-
res.append(e)
68-
if not e.endswith(";"):
69-
res.append(";")
70-
else:
71-
res.append("&#x%s;" % (hex(cp)[2:]))
72-
return ("".join(res), exc.end)
73-
else:
74-
return xmlcharrefreplace_errors(exc)
7526

76-
register_error(unicode_encode_errors, htmlentityreplace_errors)
27+
encode_entity_map = {}
28+
is_ucs4 = len("\U0010FFFF") == 1
29+
for k, v in list(entities.items()):
30+
# skip multi-character entities
31+
if ((is_ucs4 and len(v) > 1) or
32+
(not is_ucs4 and len(v) > 2)):
33+
continue
34+
if v != "&":
35+
if len(v) == 2:
36+
v = utils.surrogatePairToCodepoint(v)
37+
else:
38+
v = ord(v)
39+
if v not in encode_entity_map or k.islower():
40+
# prefer < over < and similarly for &, >, etc.
41+
encode_entity_map[v] = k
42+
43+
44+
def htmlentityreplace_errors(exc):
45+
if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
46+
res = []
47+
codepoints = []
48+
skip = False
49+
for i, c in enumerate(exc.object[exc.start:exc.end]):
50+
if skip:
51+
skip = False
52+
continue
53+
index = i + exc.start
54+
if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
55+
codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
56+
skip = True
57+
else:
58+
codepoint = ord(c)
59+
codepoints.append(codepoint)
60+
for cp in codepoints:
61+
e = encode_entity_map.get(cp)
62+
if e:
63+
res.append("&")
64+
res.append(e)
65+
if not e.endswith(";"):
66+
res.append(";")
67+
else:
68+
res.append("&#x%s;" % (hex(cp)[2:]))
69+
return ("".join(res), exc.end)
70+
else:
71+
return xmlcharrefreplace_errors(exc)
7772

78-
del register_error
73+
register_error("htmlentityreplace", htmlentityreplace_errors)
7974

8075

8176
class HTMLSerializer(object):
@@ -168,7 +163,7 @@ def __init__(self, **kwargs):
168163
def encode(self, string):
169164
assert(isinstance(string, text_type))
170165
if self.encoding:
171-
return string.encode(self.encoding, unicode_encode_errors)
166+
return string.encode(self.encoding, "htmlentityreplace")
172167
else:
173168
return string
174169

html5lib/tests/support.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from __future__ import absolute_import, division, unicode_literals
22

3+
# pylint:disable=wrong-import-position
4+
35
import os
46
import sys
57
import codecs
@@ -68,6 +70,8 @@
6870
"walker": treewalkers.getTreeWalker("genshi")
6971
}
7072

73+
# pylint:enable=wrong-import-position
74+
7175

7276
def get_data_files(subdirectory, files='*.dat', search_dir=test_dir):
7377
return sorted(glob.glob(os.path.join(search_dir, subdirectory, files)))

html5lib/tests/test_encoding.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def test_encoding():
5555
yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
5656
yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])
5757

58+
# pylint:disable=wrong-import-position
5859
try:
5960
try:
6061
import charade # noqa
@@ -67,3 +68,4 @@ def test_chardet():
6768
with open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb") as fp:
6869
encoding = inputstream.HTMLInputStream(fp.read()).charEncoding
6970
assert encoding[0].name == "big5"
71+
# pylint:enable=wrong-import-position

html5lib/tests/test_serializer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,15 @@
1212
from html5lib.serializer import HTMLSerializer, serialize
1313
from html5lib.treewalkers._base import TreeWalker
1414

15+
# pylint:disable=wrong-import-position
1516
optionals_loaded = []
1617

1718
try:
1819
from lxml import etree
1920
optionals_loaded.append("lxml")
2021
except ImportError:
2122
pass
23+
# pylint:enable=wrong-import-position
2224

2325
default_namespace = constants.namespaces["html"]
2426

html5lib/tokenizer.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
from __future__ import absolute_import, division, unicode_literals
22

3-
try:
4-
chr = unichr # noqa
5-
except NameError:
6-
pass
3+
from six import unichr as chr
74

85
from collections import deque
96

html5lib/trie/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44

55
Trie = PyTrie
66

7+
# pylint:disable=wrong-import-position
78
try:
89
from .datrie import Trie as DATrie
910
except ImportError:
1011
pass
1112
else:
1213
Trie = DATrie
14+
# pylint:enable=wrong-import-position

0 commit comments

Comments
 (0)