Skip to content

Commit 0081e81

Browse files
committed
Add expected failures for the tokenizer (except lone surrogates!).
We can't currently add the lone-surrogates tests because the expected failures file is UTF-8 and we can't have lone-surrogates there. Le sigh…
1 parent c66d41e commit 0081e81

File tree

2 files changed

+63
-2
lines changed

2 files changed

+63
-2
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#data
2+
<!DOCTYPE>
3+
4+
#data
5+
<!DOCTYPE >
6+
7+
#data
8+
<!DOCTYPE
9+
10+
#data
11+
<!DOCTYPE
12+
13+
14+
#data
15+
<!DOCTYPE
16+
17+
#data
18+
<!DOCTYPE
19+
20+
#data
21+
<!DOCTYPE
22+
23+
24+
#data
25+
<!DOCTYPE
26+
27+
#data
28+
<!DOCTYPE
29+
30+
#data
31+
<!DOCTYPE
32+
33+
#data
34+
<!DOCTYPE
35+
36+
#data
37+
<!DOCTYPE
38+
39+
#data
40+
I'm &no
41+
42+
#data
43+
<!DOCTYPE

html5lib/tests/test_tokenizer.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
import json
44
import warnings
55
import re
6+
import os
67

7-
from .support import get_data_files
8+
from .support import get_data_files, TestData, xfail
89

910
from html5lib.tokenizer import HTMLTokenizer
1011
from html5lib import constants
@@ -161,6 +162,11 @@ def runTokenizerTest(test):
161162
assert tokensMatch(expected, received, ignoreErrorOrder, True), errorMsg
162163

163164

165+
@xfail
166+
def xfailRunTokenizerTest(*args, **kwargs):
167+
return runTokenizerTest(*args, **kwargs)
168+
169+
164170
def _doCapitalize(match):
165171
return match.group(1).upper()
166172

@@ -174,6 +180,14 @@ def capitalize(s):
174180

175181

176182
def testTokenizer():
183+
# Get xfails
184+
filename = os.path.join(os.path.split(__file__)[0],
185+
"expected-failures",
186+
"tokenizer.dat")
187+
xfails = TestData(filename, "data")
188+
xfails = frozenset([x["data"] for x in xfails])
189+
190+
# Get tests
177191
for filename in get_data_files('tokenizer', '*.test'):
178192
with open(filename) as fp:
179193
tests = json.load(fp)
@@ -185,4 +199,8 @@ def testTokenizer():
185199
test = unescape(test)
186200
for initialState in test["initialStates"]:
187201
test["initialState"] = capitalize(initialState)
188-
yield runTokenizerTest, test
202+
if test['input'] in xfails:
203+
testFunc = xfailRunTokenizerTest
204+
else:
205+
testFunc = runTokenizerTest
206+
yield testFunc, test

0 commit comments

Comments
 (0)