Skip to content

Commit d81e892

Browse files
committed
Rebuild Py3
1 parent 315d482 commit d81e892

9 files changed

+31
-39
lines changed

html5lib/inputstream.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,9 @@ def openStream(self, source):
193193
else:
194194
# Otherwise treat source as a string and convert to a file object
195195
if isinstance(source, str):
196-
# This can error (on invalid characters, thus the need for the argument)
197-
source = source.encode('utf-32', errors="replace")
198-
self.charEncoding = ("utf-32", "certain")
196+
# XXX: we should handle lone surrogates here
197+
source = source.encode('utf-8', errors="replace")
198+
self.charEncoding = ("utf-8", "certain")
199199
try:
200200
from io import BytesIO
201201
except:
@@ -783,9 +783,7 @@ def codecName(encoding):
783783
"""Return the python codec name corresponding to an encoding or None if the
784784
string doesn't correspond to a valid encoding."""
785785
if encoding:
786-
print(encoding)
787786
canonicalName = ascii_punctuation_re.sub("", encoding).lower()
788-
print(canonicalName)
789787
return encodings.get(canonicalName, None)
790788
else:
791789
return None

html5lib/sanitizer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ class HTMLSanitizerMixin(object):
4949
'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
5050
'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
5151
'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
52-
'optimum', 'pattern', 'ping', 'point-size', 'prompt', 'pqg',
53-
'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
52+
'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
53+
'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
5454
'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
5555
'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
5656
'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
@@ -97,7 +97,7 @@ class HTMLSanitizerMixin(object):
9797
'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
9898
'y1', 'y2', 'zoomAndPan']
9999

100-
attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc',
100+
attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster',
101101
'xlink:href', 'xml:base']
102102

103103
svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',

html5lib/tests/support.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,8 @@
6262
except ImportError:
6363
pass
6464

65-
def html5lib_test_files(subdirectory, files='*.dat'):
65+
def get_data_files(subdirectory, files='*.dat'):
6666
return glob.glob(os.path.join(test_dir,subdirectory,files))
67-
html5lib_test_files.__test__ = False
6867

6968
class DefaultDict(dict):
7069
def __init__(self, default, *args, **kwargs):

html5lib/tests/test_encoding.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
except AttributeError:
88
unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
99

10-
from .support import html5lib_test_files, TestData, test_dir
10+
from .support import get_data_files, TestData, test_dir
1111
from html5lib import HTMLParser, inputstream
1212

1313
class Html5EncodingTestCase(unittest.TestCase):
@@ -24,7 +24,7 @@ def test_codec_name_d(self):
2424
self.assertEqual(inputstream.codecName("ISO_8859--1"), "windows-1252")
2525

2626
def buildTestSuite():
27-
for filename in html5lib_test_files("encoding"):
27+
for filename in get_data_files("encoding"):
2828
test_name = os.path.basename(filename).replace('.dat',''). \
2929
replace('-','')
3030
tests = TestData(filename, "data")

html5lib/tests/test_parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
warnings.simplefilter("error")
99

10-
from .support import html5lib_test_files as data_files
10+
from .support import get_data_files
1111
from .support import TestData, convert, convertExpected, treeTypes
1212
import html5lib
1313
from html5lib import html5parser, treebuilders, constants
@@ -67,7 +67,7 @@ def runParserTest(innerHTML, input, expected, errors, treeClass,
6767

6868
def test_parser():
6969
sys.stderr.write('Testing tree builders '+ " ".join(list(treeTypes.keys())) + "\n")
70-
files = data_files('tree-construction')
70+
files = get_data_files('tree-construction')
7171

7272
for filename in files:
7373
testName = os.path.basename(filename).replace(".dat","")

html5lib/tests/test_serializer.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
import unittest
3-
from .support import html5lib_test_files
3+
from .support import get_data_files
44

55
try:
66
import json
@@ -183,11 +183,12 @@ def testEntityNoResolve(self):
183183
self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>""", result)
184184

185185
def test_serializer():
186-
for filename in html5lib_test_files('serializer', '*.test'):
187-
tests = json.load(open(filename))
188-
test_name = os.path.basename(filename).replace('.test','')
189-
for index, test in enumerate(tests['tests']):
190-
xhtml = test.get("xhtml", test["expected"])
191-
if test_name == 'optionaltags':
192-
xhtml = None
193-
yield runSerializerTest, test["input"], test["expected"], xhtml, test.get("options", {})
186+
for filename in get_data_files('serializer', '*.test'):
187+
with open(filename) as fp:
188+
tests = json.load(fp)
189+
test_name = os.path.basename(filename).replace('.test','')
190+
for index, test in enumerate(tests['tests']):
191+
xhtml = test.get("xhtml", test["expected"])
192+
if test_name == 'optionaltags':
193+
xhtml = None
194+
yield runSerializerTest, test["input"], test["expected"], xhtml, test.get("options", {})

html5lib/tests/test_tokenizer.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import sys
44
import os
5-
import unittest
65
import io
76
import warnings
87
import re
@@ -12,7 +11,7 @@
1211
except ImportError:
1312
import simplejson as json
1413

15-
from .support import html5lib_test_files
14+
from .support import get_data_files
1615
from html5lib.tokenizer import HTMLTokenizer
1716
from html5lib import constants
1817

@@ -124,7 +123,7 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
124123
tokens[tokenType][1].append(token)
125124
return tokens["expected"] == tokens["received"]
126125

127-
def unescape_test(test):
126+
def unescape(test):
128127
def decode(inp):
129128
return inp.encode("utf-8").decode("unicode-escape")
130129

@@ -139,14 +138,12 @@ def decode(inp):
139138
del token[2][key]
140139
token[2][decode(key)] = decode(value)
141140
return test
142-
unescape_test.__test__ = False
143-
144141

145142
def runTokenizerTest(test):
146143
#XXX - move this out into the setup function
147144
#concatenate all consecutive character tokens into a single token
148145
if 'doubleEscaped' in test:
149-
test = unescape_test(test)
146+
test = unescape(test)
150147

151148
expected = concatenateCharacterTokens(test['output'])
152149
if 'lastStartTag' not in test:
@@ -166,8 +163,7 @@ def runTokenizerTest(test):
166163
"\nreceived:", str(tokens)])
167164
errorMsg = errorMsg
168165
ignoreErrorOrder = test.get('ignoreErrorOrder', False)
169-
assert tokensMatch(expected, received, ignoreErrorOrder), errorMsg
170-
166+
assert tokensMatch(expected, received, ignoreErrorOrder, True), errorMsg
171167

172168
def _doCapitalize(match):
173169
return match.group(1).upper()
@@ -179,19 +175,17 @@ def capitalize(s):
179175
s = _capitalizeRe(_doCapitalize, s)
180176
return s
181177

182-
183-
def test_tokenizer():
184-
for filename in html5lib_test_files('tokenizer', '*.test'):
178+
def testTokenizer():
179+
for filename in get_data_files('tokenizer', '*.test'):
185180
with open(filename) as fp:
186181
tests = json.load(fp)
187182
testName = os.path.basename(filename).replace(".test","")
188183
if 'tests' in tests:
189184
for index,test in enumerate(tests['tests']):
190-
#Skip tests with a self closing flag
185+
#Skip tests with a self closing flag
191186
skip = False
192187
if 'initialStates' not in test:
193188
test["initialStates"] = ["Data state"]
194189
for initialState in test["initialStates"]:
195190
test["initialState"] = capitalize(initialState)
196191
yield runTokenizerTest, test
197-

html5lib/tests/test_treewalkers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
warnings.simplefilter("error")
1212

13-
from .support import html5lib_test_files, TestData, convertExpected
13+
from .support import get_data_files, TestData, convertExpected
1414

1515
from html5lib import html5parser, treewalkers, treebuilders, constants
1616
from html5lib.filters.lint import Filter as LintFilter, LintError
@@ -298,7 +298,7 @@ def test_treewalker():
298298
sys.stdout.write('Testing tree walkers '+ " ".join(list(treeTypes.keys())) + "\n")
299299

300300
for treeName, treeCls in treeTypes.items():
301-
files = html5lib_test_files('tree-construction')
301+
files = get_data_files('tree-construction')
302302
for filename in files:
303303
testName = os.path.basename(filename).replace(".dat","")
304304

html5lib/tests/tokenizertotree.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def main(out_path):
1717
sys.stderr.write("Path %s does not exist"%out_path)
1818
sys.exit(1)
1919

20-
for filename in support.html5lib_test_files('tokenizer', '*.test'):
20+
for filename in support.get_data_files('tokenizer', '*.test'):
2121
run_file(filename, out_path)
2222

2323
def run_file(filename, out_path):

0 commit comments

Comments
 (0)