Skip to content

Commit 8c4b7c3

Browse files
committed
Add support for initial content model flags in test framework and tests courtesy of Thomas Broyer
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40410
1 parent 91ae089 commit 8c4b7c3

File tree

1 file changed

+36
-17
lines changed

1 file changed

+36
-17
lines changed

tests/test_tokenizer.py

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,24 @@
1212
sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
1313

1414
from tokenizer import HTMLTokenizer
15+
import constants
1516

1617
class TokenizerTestParser(object):
18+
def __init__(self, contentModelFlag, lastStartTag=None):
19+
self.tokenizer = HTMLTokenizer
20+
self._contentModelFlag = constants.contentModelFlags[contentModelFlag]
21+
self._lastStartTag = lastStartTag
22+
1723
def parse(self, stream, innerHTML=False):
24+
tokenizer = self.tokenizer(stream)
1825
self.outputTokens = []
1926

20-
self.tokenizer = HTMLTokenizer(stream)
27+
tokenizer.contentModelFlag = self._contentModelFlag
28+
if self._lastStartTag is not None:
29+
tokenizer.currentToken = {"type": "startTag",
30+
"name":self._lastStartTag}
2131

22-
for token in self.tokenizer:
32+
for token in tokenizer:
2333
getattr(self, 'process%s' % token["type"])(token)
2434

2535
return self.outputTokens
@@ -82,14 +92,20 @@ def tokensMatch(expectedTokens, recievedTokens):
8292

8393

8494
class TestCase(unittest.TestCase):
85-
def runTokenizerTest(self, input, output):
95+
def runTokenizerTest(self, test):
8696
#XXX - move this out into the setup function
8797
#concatenate all consecutive character tokens into a single token
88-
output = concatenateCharacterTokens(output)
89-
parser = TokenizerTestParser()
90-
tokens = parser.parse(input)
98+
output = concatenateCharacterTokens(test['output'])
99+
if 'lastStartTag' not in test:
100+
test['lastStartTag'] = None
101+
parser = TokenizerTestParser(test['contentModelFlag'],
102+
test['lastStartTag'])
103+
104+
tokens = parser.parse(test['input'])
91105
tokens = concatenateCharacterTokens(tokens)
92-
errorMsg = "\n".join(["\n\nExpected:", str(output), "\nRecieved:",
106+
errorMsg = "\n".join(["\n\nContent Model Flag:",
107+
test['contentModelFlag'] ,
108+
"\nExpected:", str(output), "\nRecieved:",
93109
str(tokens)])
94110
self.assertEquals(tokensMatch(tokens, output), True, errorMsg)
95111

@@ -98,19 +114,22 @@ def test_tokenizer():
98114
for filename in glob.glob('tokenizer/*.test'):
99115
tests = simplejson.load(file(filename))
100116
for test in tests['tests']:
101-
yield (TestCase.runTokenizerTest, test['description'],
102-
test['input'], test['output'])
117+
yield (TestCase.runTokenizerTest, test)
103118

104119
def buildTestSuite():
105120
tests = 0
106-
for func, desc, input, output in test_tokenizer():
107-
tests += 1
108-
testName = 'test%d' % tests
109-
testFunc = lambda self, method=func, input=input, output=output: \
110-
method(self, input, output)
111-
testFunc.__doc__ = "\t".join([desc, str(input), str(output)])
112-
instanceMethod = new.instancemethod(testFunc, None, TestCase)
113-
setattr(TestCase, testName, instanceMethod)
121+
for func, test in test_tokenizer():
122+
if 'contentModelFlags' not in test:
123+
test["contentModelFlags"] = ["PCDATA"]
124+
for contentModelFlag in test["contentModelFlags"]:
125+
tests += 1
126+
testName = 'test%d' % tests
127+
test["contentModelFlag"] = contentModelFlag
128+
testFunc = lambda self, method=func, test=test: \
129+
method(self, test)
130+
testFunc.__doc__ = "\t".join([test['description'], str(test['input'])])
131+
instanceMethod = new.instancemethod(testFunc, None, TestCase)
132+
setattr(TestCase, testName, instanceMethod)
114133
return unittest.TestLoader().loadTestsFromTestCase(TestCase)
115134

116135
def main():

0 commit comments

Comments
 (0)