Skip to content

Commit 2816de7

Browse files
committed
Get encoding tests running again under nose and Py3
1 parent a32418c commit 2816de7

File tree

2 files changed

+32
-39
lines changed

2 files changed

+32
-39
lines changed

html5lib/tests/support.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,12 @@ def __getitem__(self, key):
7474
return dict.get(self, key, self.default)
7575

7676
class TestData(object):
77-
def __init__(self, filename, newTestHeading="data"):
78-
self.f = codecs.open(filename, encoding="utf8")
77+
def __init__(self, filename, newTestHeading="data", encoding="utf8"):
78+
if encoding == None:
79+
self.f = open(filename, mode="rb")
80+
else:
81+
self.f = codecs.open(filename, encoding=encoding)
82+
self.encoding = encoding
7983
self.newTestHeading = newTestHeading
8084

8185
def __del__(self):
@@ -93,7 +97,7 @@ def __iter__(self):
9397
yield self.normaliseOutput(data)
9498
data = DefaultDict(None)
9599
key = heading
96-
data[key]=""
100+
data[key]="" if self.encoding else b""
97101
elif key is not None:
98102
data[key] += line
99103
if data:
@@ -102,15 +106,16 @@ def __iter__(self):
102106
def isSectionHeading(self, line):
103107
"""If the current heading is a test section heading return the heading,
104108
otherwise return False"""
105-
if line.startswith("#"):
109+
#print(line)
110+
if line.startswith("#" if self.encoding else b"#"):
106111
return line[1:].strip()
107112
else:
108113
return False
109114

110115
def normaliseOutput(self, data):
111116
#Remove trailing newlines
112117
for key,value in data.items():
113-
if value.endswith("\n"):
118+
if value.endswith("\n" if self.encoding else b"\n"):
114119
data[key] = value[:-1]
115120
return data
116121

html5lib/tests/test_encoding.py

Lines changed: 22 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -23,42 +23,30 @@ def test_codec_name_c(self):
2323
def test_codec_name_d(self):
2424
self.assertEqual(inputstream.codecName("ISO_8859--1"), "windows-1252")
2525

26-
def buildTestSuite():
26+
def runEncodingTest(data, encoding):
27+
p = HTMLParser()
28+
t = p.parse(data, useChardet=False)
29+
encoding = encoding.lower().decode("ascii")
30+
31+
errorMessage = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n"%
32+
(data, repr(encoding),
33+
repr(p.tokenizer.stream.charEncoding[0])))
34+
assert encoding == p.tokenizer.stream.charEncoding[0], errorMessage
35+
36+
def test_encoding():
2737
for filename in get_data_files("encoding"):
2838
test_name = os.path.basename(filename).replace('.dat',''). \
2939
replace('-','')
30-
tests = TestData(filename, "data")
40+
tests = TestData(filename, b"data", encoding=None)
3141
for idx, test in enumerate(tests):
32-
def encodingTest(self, data=test['data'],
33-
encoding=test['encoding']):
34-
p = HTMLParser()
35-
t = p.parse(data, useChardet=False)
36-
37-
errorMessage = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n"%
38-
(data, repr(encoding.lower()),
39-
repr(p.tokenizer.stream.charEncoding)))
40-
self.assertEquals(encoding.lower(),
41-
p.tokenizer.stream.charEncoding[0],
42-
errorMessage)
43-
setattr(Html5EncodingTestCase, 'test_%s_%d' % (test_name, idx+1),
44-
encodingTest)
45-
46-
try:
47-
import chardet
48-
def test_chardet(self):
49-
data = open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt")).read()
50-
encoding = inputstream.HTMLInputStream(data).charEncoding
51-
assert encoding[0].lower() == "big5"
52-
setattr(Html5EncodingTestCase, 'test_chardet', test_chardet)
53-
except ImportError:
54-
print("chardet not found, skipping chardet tests")
55-
42+
yield (runEncodingTest, test[b'data'], test[b'encoding'])
5643

57-
return unittest.defaultTestLoader.loadTestsFromName(__name__)
58-
59-
def main():
60-
buildTestSuite()
61-
unittest.main()
62-
63-
if __name__ == "__main__":
64-
main()
44+
try:
45+
import chardet
46+
def test_chardet(self):
47+
data = open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt")).read()
48+
encoding = inputstream.HTMLInputStream(data).charEncoding
49+
assert encoding[0].lower() == "big5"
50+
setattr(Html5EncodingTestCase, 'test_chardet', test_chardet)
51+
except ImportError:
52+
print("chardet not found, skipping chardet tests")

0 commit comments

Comments
 (0)