Skip to content

Commit f50cb08

Browse files
committed
Unbreak sanitizer tests and convert to nose
1 parent 99f4bf1 commit f50cb08

File tree

2 files changed

+63
-85
lines changed

2 files changed

+63
-85
lines changed

html5lib/sanitizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,11 +245,11 @@ def sanitize_css(self, style):
245245

246246
class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
247247
def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
248-
lowercaseElementName=False, lowercaseAttrName=False):
248+
lowercaseElementName=False, lowercaseAttrName=False, parser=None):
249249
#Change case matching defaults as we only output lowercase html anyway
250250
#This solution doesn't seem ideal...
251251
HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
252-
lowercaseElementName, lowercaseAttrName)
252+
lowercaseElementName, lowercaseAttrName, parser=parser)
253253

254254
def __iter__(self):
255255
for token in HTMLTokenizer.__iter__(self):

html5lib/tests/test_sanitizer.py

Lines changed: 61 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -7,92 +7,70 @@
77
except ImportError:
88
import simplejson as json
99

10-
from support import html5lib_test_files
1110
from html5lib import html5parser, sanitizer, constants
1211

13-
class SanitizeTest(unittest.TestCase):
14-
def addTest(cls, name, expected, input):
15-
def test(self, expected=expected, input=input):
16-
expected = ''.join([token.toxml() for token in html5parser.HTMLParser().
17-
parseFragment(expected).childNodes])
18-
expected = json.loads(json.dumps(expected))
19-
self.assertEqual(expected, self.sanitize_html(input))
20-
setattr(cls, name, test)
21-
addTest = classmethod(addTest)
12+
def runSanitizerTest(name, expected, input):
13+
expected = ''.join([token.toxml() for token in html5parser.HTMLParser().
14+
parseFragment(expected).childNodes])
15+
expected = json.loads(json.dumps(expected))
16+
assert expected == sanitize_html(input)
2217

23-
def sanitize_html(self,stream):
18+
def sanitize_html(stream):
2419
return ''.join([token.toxml() for token in
25-
html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
26-
parseFragment(stream).childNodes])
27-
28-
def test_should_handle_astral_plane_characters(self):
29-
self.assertEqual(u"<p>\U0001d4b5 \U0001d538</p>",
30-
self.sanitize_html("<p>&#x1d4b5; &#x1d538;</p>"))
31-
32-
for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
33-
if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']: continue ### TODO
34-
if tag_name != tag_name.lower(): continue ### TODO
35-
if tag_name == 'image':
36-
SanitizeTest.addTest("test_should_allow_%s_tag" % tag_name,
37-
"<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
38-
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
39-
elif tag_name == 'br':
40-
SanitizeTest.addTest("test_should_allow_%s_tag" % tag_name,
41-
"<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
20+
html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
21+
parseFragment(stream).childNodes])
22+
23+
def test_should_handle_astral_plane_characters():
24+
assert u"<p>\U0001d4b5 \U0001d538</p>" == sanitize_html("<p>&#x1d4b5; &#x1d538;</p>")
25+
26+
def test_sanitizer():
27+
for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
28+
if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']:
29+
continue ### TODO
30+
if tag_name != tag_name.lower():
31+
continue ### TODO
32+
if tag_name == 'image':
33+
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
34+
"<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
35+
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
36+
elif tag_name == 'br':
37+
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
38+
"<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
39+
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
40+
elif tag_name in constants.voidElements:
41+
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
42+
"<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
43+
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
44+
else:
45+
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
46+
"<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name,tag_name),
47+
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
48+
49+
for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
50+
tag_name = tag_name.upper()
51+
yield (runSanitizerTest, "test_should_forbid_%s_tag" % tag_name,
52+
"&lt;%s title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/%s&gt;" % (tag_name,tag_name),
4253
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
43-
elif tag_name in constants.voidElements:
44-
SanitizeTest.addTest("test_should_allow_%s_tag" % tag_name,
45-
"<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
46-
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
47-
else:
48-
SanitizeTest.addTest("test_should_allow_%s_tag" % tag_name,
49-
"<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name,tag_name),
50-
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
51-
52-
for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
53-
tag_name = tag_name.upper()
54-
SanitizeTest.addTest("test_should_forbid_%s_tag" % tag_name,
55-
"&lt;%s title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/%s&gt;" % (tag_name,tag_name),
56-
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
57-
58-
for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
59-
if attribute_name != attribute_name.lower(): continue ### TODO
60-
if attribute_name == 'style': continue
61-
SanitizeTest.addTest("test_should_allow_%s_attribute" % attribute_name,
62-
"<p %s=\"foo\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % attribute_name,
63-
"<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name)
64-
65-
for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
66-
attribute_name = attribute_name.upper()
67-
SanitizeTest.addTest("test_should_forbid_%s_attribute" % attribute_name,
68-
"<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
69-
"<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name)
70-
71-
for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
72-
SanitizeTest.addTest("test_should_allow_%s_uris" % protocol,
73-
"<a href=\"%s\">foo</a>" % protocol,
74-
"""<a href="%s">foo</a>""" % protocol)
75-
76-
for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
77-
SanitizeTest.addTest("test_should_allow_uppercase_%s_uris" % protocol,
78-
"<a href=\"%s\">foo</a>" % protocol,
79-
"""<a href="%s">foo</a>""" % protocol)
80-
81-
def buildTestSuite():
82-
for filename in html5lib_test_files("sanitizer"):
83-
for test in json.load(file(filename)):
84-
SanitizeTest.addTest('test_' + test['name'], test['output'], test['input'])
85-
86-
return unittest.TestLoader().loadTestsFromTestCase(SanitizeTest)
87-
88-
def sanitize_html(stream):
89-
return ''.join([token.toxml() for token in
90-
html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
91-
parseFragment(stream).childNodes])
92-
93-
def main():
94-
buildTestSuite()
95-
unittest.main()
9654

97-
if __name__ == "__main__":
98-
main()
55+
for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
56+
if attribute_name != attribute_name.lower(): continue ### TODO
57+
if attribute_name == 'style': continue
58+
yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
59+
"<p %s=\"foo\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % attribute_name,
60+
"<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name)
61+
62+
for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
63+
attribute_name = attribute_name.upper()
64+
yield (runSanitizerTest, "test_should_forbid_%s_attribute" % attribute_name,
65+
"<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
66+
"<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name)
67+
68+
for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
69+
yield (runSanitizerTest, "test_should_allow_%s_uris" % protocol,
70+
"<a href=\"%s\">foo</a>" % protocol,
71+
"""<a href="%s">foo</a>""" % protocol)
72+
73+
for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
74+
yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
75+
"<a href=\"%s\">foo</a>" % protocol,
76+
"""<a href="%s">foo</a>""" % protocol)

0 commit comments

Comments
 (0)