|
7 | 7 | except ImportError:
|
8 | 8 | import simplejson as json
|
9 | 9 |
|
10 |
| -from support import html5lib_test_files |
11 | 10 | from html5lib import html5parser, sanitizer, constants
|
12 | 11 |
|
13 |
| -class SanitizeTest(unittest.TestCase): |
14 |
| - def addTest(cls, name, expected, input): |
15 |
| - def test(self, expected=expected, input=input): |
16 |
| - expected = ''.join([token.toxml() for token in html5parser.HTMLParser(). |
17 |
| - parseFragment(expected).childNodes]) |
18 |
| - expected = json.loads(json.dumps(expected)) |
19 |
| - self.assertEqual(expected, self.sanitize_html(input)) |
20 |
| - setattr(cls, name, test) |
21 |
| - addTest = classmethod(addTest) |
| 12 | +def runSanitizerTest(name, expected, input): |
| 13 | + expected = ''.join([token.toxml() for token in html5parser.HTMLParser(). |
| 14 | + parseFragment(expected).childNodes]) |
| 15 | + expected = json.loads(json.dumps(expected)) |
| 16 | + assert expected == sanitize_html(input) |
22 | 17 |
|
23 |
| - def sanitize_html(self,stream): |
| 18 | +def sanitize_html(stream): |
24 | 19 | return ''.join([token.toxml() for token in
|
25 |
| - html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer). |
26 |
| - parseFragment(stream).childNodes]) |
27 |
| - |
28 |
| - def test_should_handle_astral_plane_characters(self): |
29 |
| - self.assertEqual(u"<p>\U0001d4b5 \U0001d538</p>", |
30 |
| - self.sanitize_html("<p>𝒵 𝔸</p>")) |
31 |
| - |
32 |
| -for tag_name in sanitizer.HTMLSanitizer.allowed_elements: |
33 |
| - if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']: continue ### TODO |
34 |
| - if tag_name != tag_name.lower(): continue ### TODO |
35 |
| - if tag_name == 'image': |
36 |
| - SanitizeTest.addTest("test_should_allow_%s_tag" % tag_name, |
37 |
| - "<img title=\"1\"/>foo <bad>bar</bad> baz", |
38 |
| - "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name)) |
39 |
| - elif tag_name == 'br': |
40 |
| - SanitizeTest.addTest("test_should_allow_%s_tag" % tag_name, |
41 |
| - "<br title=\"1\"/>foo <bad>bar</bad> baz<br/>", |
| 20 | + html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer). |
| 21 | + parseFragment(stream).childNodes]) |
| 22 | + |
| 23 | +def test_should_handle_astral_plane_characters(): |
| 24 | + assert u"<p>\U0001d4b5 \U0001d538</p>" == sanitize_html("<p>𝒵 𝔸</p>") |
| 25 | + |
| 26 | +def test_sanitizer(): |
| 27 | + for tag_name in sanitizer.HTMLSanitizer.allowed_elements: |
| 28 | + if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']: |
| 29 | + continue ### TODO |
| 30 | + if tag_name != tag_name.lower(): |
| 31 | + continue ### TODO |
| 32 | + if tag_name == 'image': |
| 33 | + yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name, |
| 34 | + "<img title=\"1\"/>foo <bad>bar</bad> baz", |
| 35 | + "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name)) |
| 36 | + elif tag_name == 'br': |
| 37 | + yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name, |
| 38 | + "<br title=\"1\"/>foo <bad>bar</bad> baz<br/>", |
| 39 | + "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name)) |
| 40 | + elif tag_name in constants.voidElements: |
| 41 | + yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name, |
| 42 | + "<%s title=\"1\"/>foo <bad>bar</bad> baz" % tag_name, |
| 43 | + "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name)) |
| 44 | + else: |
| 45 | + yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name, |
| 46 | + "<%s title=\"1\">foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name), |
| 47 | + "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name)) |
| 48 | + |
| 49 | + for tag_name in sanitizer.HTMLSanitizer.allowed_elements: |
| 50 | + tag_name = tag_name.upper() |
| 51 | + yield (runSanitizerTest, "test_should_forbid_%s_tag" % tag_name, |
| 52 | + "<%s title=\"1\">foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name), |
42 | 53 | "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
|
43 |
| - elif tag_name in constants.voidElements: |
44 |
| - SanitizeTest.addTest("test_should_allow_%s_tag" % tag_name, |
45 |
| - "<%s title=\"1\"/>foo <bad>bar</bad> baz" % tag_name, |
46 |
| - "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name)) |
47 |
| - else: |
48 |
| - SanitizeTest.addTest("test_should_allow_%s_tag" % tag_name, |
49 |
| - "<%s title=\"1\">foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name), |
50 |
| - "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name)) |
51 |
| - |
52 |
| -for tag_name in sanitizer.HTMLSanitizer.allowed_elements: |
53 |
| - tag_name = tag_name.upper() |
54 |
| - SanitizeTest.addTest("test_should_forbid_%s_tag" % tag_name, |
55 |
| - "<%s title=\"1\">foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name), |
56 |
| - "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name)) |
57 |
| - |
58 |
| -for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes: |
59 |
| - if attribute_name != attribute_name.lower(): continue ### TODO |
60 |
| - if attribute_name == 'style': continue |
61 |
| - SanitizeTest.addTest("test_should_allow_%s_attribute" % attribute_name, |
62 |
| - "<p %s=\"foo\">foo <bad>bar</bad> baz</p>" % attribute_name, |
63 |
| - "<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name) |
64 |
| - |
65 |
| -for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes: |
66 |
| - attribute_name = attribute_name.upper() |
67 |
| - SanitizeTest.addTest("test_should_forbid_%s_attribute" % attribute_name, |
68 |
| - "<p>foo <bad>bar</bad> baz</p>", |
69 |
| - "<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name) |
70 |
| - |
71 |
| -for protocol in sanitizer.HTMLSanitizer.allowed_protocols: |
72 |
| - SanitizeTest.addTest("test_should_allow_%s_uris" % protocol, |
73 |
| - "<a href=\"%s\">foo</a>" % protocol, |
74 |
| - """<a href="%s">foo</a>""" % protocol) |
75 |
| - |
76 |
| -for protocol in sanitizer.HTMLSanitizer.allowed_protocols: |
77 |
| - SanitizeTest.addTest("test_should_allow_uppercase_%s_uris" % protocol, |
78 |
| - "<a href=\"%s\">foo</a>" % protocol, |
79 |
| - """<a href="%s">foo</a>""" % protocol) |
80 |
| - |
81 |
| -def buildTestSuite(): |
82 |
| - for filename in html5lib_test_files("sanitizer"): |
83 |
| - for test in json.load(file(filename)): |
84 |
| - SanitizeTest.addTest('test_' + test['name'], test['output'], test['input']) |
85 |
| - |
86 |
| - return unittest.TestLoader().loadTestsFromTestCase(SanitizeTest) |
87 |
| - |
88 |
| -def sanitize_html(stream): |
89 |
| - return ''.join([token.toxml() for token in |
90 |
| - html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer). |
91 |
| - parseFragment(stream).childNodes]) |
92 |
| - |
93 |
| -def main(): |
94 |
| - buildTestSuite() |
95 |
| - unittest.main() |
96 | 54 |
|
97 |
| -if __name__ == "__main__": |
98 |
| - main() |
| 55 | + for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes: |
| 56 | + if attribute_name != attribute_name.lower(): continue ### TODO |
| 57 | + if attribute_name == 'style': continue |
| 58 | + yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name, |
| 59 | + "<p %s=\"foo\">foo <bad>bar</bad> baz</p>" % attribute_name, |
| 60 | + "<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name) |
| 61 | + |
| 62 | + for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes: |
| 63 | + attribute_name = attribute_name.upper() |
| 64 | + yield (runSanitizerTest, "test_should_forbid_%s_attribute" % attribute_name, |
| 65 | + "<p>foo <bad>bar</bad> baz</p>", |
| 66 | + "<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name) |
| 67 | + |
| 68 | + for protocol in sanitizer.HTMLSanitizer.allowed_protocols: |
| 69 | + yield (runSanitizerTest, "test_should_allow_%s_uris" % protocol, |
| 70 | + "<a href=\"%s\">foo</a>" % protocol, |
| 71 | + """<a href="%s">foo</a>""" % protocol) |
| 72 | + |
| 73 | + for protocol in sanitizer.HTMLSanitizer.allowed_protocols: |
| 74 | + yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol, |
| 75 | + "<a href=\"%s\">foo</a>" % protocol, |
| 76 | + """<a href="%s">foo</a>""" % protocol) |
0 commit comments