Skip to content

Commit 31d2b07

Browse files
committed
Added whitespace stripper filter tests.
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40915
1 parent 7568d31 commit 31d2b07

File tree

2 files changed

+124
-106
lines changed

2 files changed

+124
-106
lines changed

src/html5lib/filters/whitespace.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@
1010
from html5lib.constants import rcdataElements, spaceCharacters
1111
spaceCharacters = u"".join(spaceCharacters)
1212

13+
SPACES_REGEX = re.compile(u"[%s]+" % spaceCharacters)
14+
1315
class Filter(_base.Filter):
14-
16+
1517
spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
16-
18+
1719
def __iter__(self):
1820
preserve = 0
1921
for token in _base.Filter.__iter__(self):
@@ -35,5 +37,5 @@ def __iter__(self):
3537
yield token
3638

3739
def collapse_spaces(text):
38-
return re.compile(u"[%s]+" % spaceCharacters).sub(' ', text)
40+
return SPACES_REGEX.sub(' ', text)
3941

tests/test_whitespace_filter.py

Lines changed: 119 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -1,103 +1,119 @@
1-
import unittest
2-
3-
from html5lib.filters.whitespace import Filter
4-
from html5lib.constants import spaceCharacters
5-
spaceCharacters = u"".join(spaceCharacters)
6-
7-
class TestCase(unittest.TestCase):
8-
def runTest(self, input, expected):
9-
output = list(Filter(input))
10-
errorMsg = "\n".join(["\n\nInput:", str(input),
11-
"\nExpected:", str(expected),
12-
"\nReceived:", str(output)])
13-
self.assertEquals(output, expected, errorMsg)
14-
15-
def runTestUnmodifiedOutput(self, input):
16-
self.runTest(input, input)
17-
18-
def testLeadingWhitespace(self):
19-
self.runTest(
20-
[{"type": u"StartTag", "name": u"p", "data": []},
21-
{"type": u"SpaceCharacters", "data": spaceCharacters},
22-
{"type": u"Characters", "data": u"foo"},
23-
{"type": u"EndTag", "name": u"p", "data": []}],
24-
[{"type": u"StartTag", "name": u"p", "data": []},
25-
{"type": u"SpaceCharacters", "data": u" "},
26-
{"type": u"Characters", "data": u"foo"},
27-
{"type": u"EndTag", "name": u"p", "data": []}])
28-
29-
def testLeadingWhitespaceAsCharacters(self):
30-
self.runTest(
31-
[{"type": u"StartTag", "name": u"p", "data": []},
32-
{"type": u"Characters", "data": spaceCharacters + u"foo"},
33-
{"type": u"EndTag", "name": u"p", "data": []}],
34-
[{"type": u"StartTag", "name": u"p", "data": []},
35-
{"type": u"Characters", "data": u" foo"},
36-
{"type": u"EndTag", "name": u"p", "data": []}])
37-
38-
def testTrailingWhitespace(self):
39-
self.runTest(
40-
[{"type": u"StartTag", "name": u"p", "data": []},
41-
{"type": u"Characters", "data": u"foo"},
42-
{"type": u"SpaceCharacters", "data": spaceCharacters},
43-
{"type": u"EndTag", "name": u"p", "data": []}],
44-
[{"type": u"StartTag", "name": u"p", "data": []},
45-
{"type": u"Characters", "data": u"foo"},
46-
{"type": u"SpaceCharacters", "data": u" "},
47-
{"type": u"EndTag", "name": u"p", "data": []}])
48-
49-
def testTrailingWhitespaceAsCharacters(self):
50-
self.runTest(
51-
[{"type": u"StartTag", "name": u"p", "data": []},
52-
{"type": u"Characters", "data": u"foo" + spaceCharacters},
53-
{"type": u"EndTag", "name": u"p", "data": []}],
54-
[{"type": u"StartTag", "name": u"p", "data": []},
55-
{"type": u"Characters", "data": u"foo "},
56-
{"type": u"EndTag", "name": u"p", "data": []}])
57-
58-
def testWhitespace(self):
59-
self.runTest(
60-
[{"type": u"StartTag", "name": u"p", "data": []},
61-
{"type": u"Characters", "data": u"foo" + spaceCharacters + "bar"},
62-
{"type": u"EndTag", "name": u"p", "data": []}],
63-
[{"type": u"StartTag", "name": u"p", "data": []},
64-
{"type": u"Characters", "data": u"foo bar"},
65-
{"type": u"EndTag", "name": u"p", "data": []}])
66-
67-
def testLeadingWhitespaceInPre(self):
68-
self.runTestUnmodifiedOutput(
69-
[{"type": u"StartTag", "name": u"pre", "data": []},
70-
{"type": u"SpaceCharacters", "data": spaceCharacters},
71-
{"type": u"Characters", "data": u"foo"},
72-
{"type": u"EndTag", "name": u"pre", "data": []}])
73-
74-
def testLeadingWhitespaceAsCharactersInPre(self):
75-
self.runTestUnmodifiedOutput(
76-
[{"type": u"StartTag", "name": u"pre", "data": []},
77-
{"type": u"Characters", "data": spaceCharacters + u"foo"},
78-
{"type": u"EndTag", "name": u"pre", "data": []}])
79-
80-
def testTrailingWhitespaceInPre(self):
81-
self.runTestUnmodifiedOutput(
82-
[{"type": u"StartTag", "name": u"pre", "data": []},
83-
{"type": u"Characters", "data": u"foo"},
84-
{"type": u"SpaceCharacters", "data": spaceCharacters},
85-
{"type": u"EndTag", "name": u"pre", "data": []}])
86-
87-
def testTrailingWhitespaceAsCharactersInPre(self):
88-
self.runTestUnmodifiedOutput(
89-
[{"type": u"StartTag", "name": u"pre", "data": []},
90-
{"type": u"Characters", "data": u"foo" + spaceCharacters},
91-
{"type": u"EndTag", "name": u"pre", "data": []}])
92-
93-
def testWhitespaceInPre(self):
94-
self.runTestUnmodifiedOutput(
95-
[{"type": u"StartTag", "name": u"pre", "data": []},
96-
{"type": u"Characters", "data": u"foo" + spaceCharacters + "bar"},
97-
{"type": u"EndTag", "name": u"pre", "data": []}])
98-
99-
def main():
100-
unittest.main()
101-
102-
if __name__ == "__main__":
103-
main()
1+
import unittest
2+
3+
from html5lib.filters.whitespace import Filter
4+
from html5lib.constants import spaceCharacters
5+
spaceCharacters = u"".join(spaceCharacters)
6+
7+
class TestCase(unittest.TestCase):
8+
def runTest(self, input, expected):
9+
output = list(Filter(input))
10+
errorMsg = "\n".join(["\n\nInput:", str(input),
11+
"\nExpected:", str(expected),
12+
"\nReceived:", str(output)])
13+
self.assertEquals(output, expected, errorMsg)
14+
15+
def runTestUnmodifiedOutput(self, input):
16+
self.runTest(input, input)
17+
18+
def testPhrasingElements(self):
19+
self.runTestUnmodifiedOutput(
20+
[{"type": u"Characters", "data": u"This is a " },
21+
{"type": u"StartTag", "name": u"span", "data": [] },
22+
{"type": u"Characters", "data": u"phrase" },
23+
{"type": u"EndTag", "name": u"span", "data": []},
24+
{"type": u"SpaceCharacters", "data": u" " },
25+
{"type": u"Characters", "data": u"with" },
26+
{"type": u"SpaceCharacters", "data": u" " },
27+
{"type": u"StartTag", "name": u"em", "data": [] },
28+
{"type": u"Characters", "data": u"emphasised text" },
29+
{"type": u"EndTag", "name": u"em", "data": []},
30+
{"type": u"Characters", "data": u" and an " },
31+
{"type": u"StartTag", "name": u"img", "data": [[u"alt", u"image"]] },
32+
{"type": u"Characters", "data": u"." }])
33+
34+
def testLeadingWhitespace(self):
35+
self.runTest(
36+
[{"type": u"StartTag", "name": u"p", "data": []},
37+
{"type": u"SpaceCharacters", "data": spaceCharacters},
38+
{"type": u"Characters", "data": u"foo"},
39+
{"type": u"EndTag", "name": u"p", "data": []}],
40+
[{"type": u"StartTag", "name": u"p", "data": []},
41+
{"type": u"SpaceCharacters", "data": u" "},
42+
{"type": u"Characters", "data": u"foo"},
43+
{"type": u"EndTag", "name": u"p", "data": []}])
44+
45+
def testLeadingWhitespaceAsCharacters(self):
46+
self.runTest(
47+
[{"type": u"StartTag", "name": u"p", "data": []},
48+
{"type": u"Characters", "data": spaceCharacters + u"foo"},
49+
{"type": u"EndTag", "name": u"p", "data": []}],
50+
[{"type": u"StartTag", "name": u"p", "data": []},
51+
{"type": u"Characters", "data": u" foo"},
52+
{"type": u"EndTag", "name": u"p", "data": []}])
53+
54+
def testTrailingWhitespace(self):
55+
self.runTest(
56+
[{"type": u"StartTag", "name": u"p", "data": []},
57+
{"type": u"Characters", "data": u"foo"},
58+
{"type": u"SpaceCharacters", "data": spaceCharacters},
59+
{"type": u"EndTag", "name": u"p", "data": []}],
60+
[{"type": u"StartTag", "name": u"p", "data": []},
61+
{"type": u"Characters", "data": u"foo"},
62+
{"type": u"SpaceCharacters", "data": u" "},
63+
{"type": u"EndTag", "name": u"p", "data": []}])
64+
65+
def testTrailingWhitespaceAsCharacters(self):
66+
self.runTest(
67+
[{"type": u"StartTag", "name": u"p", "data": []},
68+
{"type": u"Characters", "data": u"foo" + spaceCharacters},
69+
{"type": u"EndTag", "name": u"p", "data": []}],
70+
[{"type": u"StartTag", "name": u"p", "data": []},
71+
{"type": u"Characters", "data": u"foo "},
72+
{"type": u"EndTag", "name": u"p", "data": []}])
73+
74+
def testWhitespace(self):
75+
self.runTest(
76+
[{"type": u"StartTag", "name": u"p", "data": []},
77+
{"type": u"Characters", "data": u"foo" + spaceCharacters + "bar"},
78+
{"type": u"EndTag", "name": u"p", "data": []}],
79+
[{"type": u"StartTag", "name": u"p", "data": []},
80+
{"type": u"Characters", "data": u"foo bar"},
81+
{"type": u"EndTag", "name": u"p", "data": []}])
82+
83+
def testLeadingWhitespaceInPre(self):
84+
self.runTestUnmodifiedOutput(
85+
[{"type": u"StartTag", "name": u"pre", "data": []},
86+
{"type": u"SpaceCharacters", "data": spaceCharacters},
87+
{"type": u"Characters", "data": u"foo"},
88+
{"type": u"EndTag", "name": u"pre", "data": []}])
89+
90+
def testLeadingWhitespaceAsCharactersInPre(self):
91+
self.runTestUnmodifiedOutput(
92+
[{"type": u"StartTag", "name": u"pre", "data": []},
93+
{"type": u"Characters", "data": spaceCharacters + u"foo"},
94+
{"type": u"EndTag", "name": u"pre", "data": []}])
95+
96+
def testTrailingWhitespaceInPre(self):
97+
self.runTestUnmodifiedOutput(
98+
[{"type": u"StartTag", "name": u"pre", "data": []},
99+
{"type": u"Characters", "data": u"foo"},
100+
{"type": u"SpaceCharacters", "data": spaceCharacters},
101+
{"type": u"EndTag", "name": u"pre", "data": []}])
102+
103+
def testTrailingWhitespaceAsCharactersInPre(self):
104+
self.runTestUnmodifiedOutput(
105+
[{"type": u"StartTag", "name": u"pre", "data": []},
106+
{"type": u"Characters", "data": u"foo" + spaceCharacters},
107+
{"type": u"EndTag", "name": u"pre", "data": []}])
108+
109+
def testWhitespaceInPre(self):
110+
self.runTestUnmodifiedOutput(
111+
[{"type": u"StartTag", "name": u"pre", "data": []},
112+
{"type": u"Characters", "data": u"foo" + spaceCharacters + "bar"},
113+
{"type": u"EndTag", "name": u"pre", "data": []}])
114+
115+
def main():
116+
unittest.main()
117+
118+
if __name__ == "__main__":
119+
main()

0 commit comments

Comments
 (0)