12
12
sys .path .insert (0 , os .path .abspath (os .path .join (os .pardir , "src" )))
13
13
14
14
from tokenizer import HTMLTokenizer
15
+ import constants
15
16
16
17
class TokenizerTestParser (object ):
18
+ def __init__ (self , contentModelFlag , lastStartTag = None ):
19
+ self .tokenizer = HTMLTokenizer
20
+ self ._contentModelFlag = constants .contentModelFlags [contentModelFlag ]
21
+ self ._lastStartTag = lastStartTag
22
+
17
23
def parse (self , stream , innerHTML = False ):
24
+ tokenizer = self .tokenizer (stream )
18
25
self .outputTokens = []
19
26
20
- self .tokenizer = HTMLTokenizer (stream )
27
+ tokenizer .contentModelFlag = self ._contentModelFlag
28
+ if self ._lastStartTag is not None :
29
+ tokenizer .currentToken = {"type" : "startTag" ,
30
+ "name" :self ._lastStartTag }
21
31
22
- for token in self . tokenizer :
32
+ for token in tokenizer :
23
33
getattr (self , 'process%s' % token ["type" ])(token )
24
34
25
35
return self .outputTokens
@@ -82,14 +92,20 @@ def tokensMatch(expectedTokens, recievedTokens):
82
92
83
93
84
94
class TestCase (unittest .TestCase ):
85
- def runTokenizerTest (self , input , output ):
95
+ def runTokenizerTest (self , test ):
86
96
#XXX - move this out into the setup function
87
97
#concatenate all consecutive character tokens into a single token
88
- output = concatenateCharacterTokens (output )
89
- parser = TokenizerTestParser ()
90
- tokens = parser .parse (input )
98
+ output = concatenateCharacterTokens (test ['output' ])
99
+ if 'lastStartTag' not in test :
100
+ test ['lastStartTag' ] = None
101
+ parser = TokenizerTestParser (test ['contentModelFlag' ],
102
+ test ['lastStartTag' ])
103
+
104
+ tokens = parser .parse (test ['input' ])
91
105
tokens = concatenateCharacterTokens (tokens )
92
- errorMsg = "\n " .join (["\n \n Expected:" , str (output ), "\n Recieved:" ,
106
+ errorMsg = "\n " .join (["\n \n Content Model Flag:" ,
107
+ test ['contentModelFlag' ] ,
108
+ "\n Expected:" , str (output ), "\n Recieved:" ,
93
109
str (tokens )])
94
110
self .assertEquals (tokensMatch (tokens , output ), True , errorMsg )
95
111
@@ -98,19 +114,22 @@ def test_tokenizer():
98
114
for filename in glob .glob ('tokenizer/*.test' ):
99
115
tests = simplejson .load (file (filename ))
100
116
for test in tests ['tests' ]:
101
- yield (TestCase .runTokenizerTest , test ['description' ],
102
- test ['input' ], test ['output' ])
117
+ yield (TestCase .runTokenizerTest , test )
103
118
104
119
def buildTestSuite ():
105
120
tests = 0
106
- for func , desc , input , output in test_tokenizer ():
107
- tests += 1
108
- testName = 'test%d' % tests
109
- testFunc = lambda self , method = func , input = input , output = output : \
110
- method (self , input , output )
111
- testFunc .__doc__ = "\t " .join ([desc , str (input ), str (output )])
112
- instanceMethod = new .instancemethod (testFunc , None , TestCase )
113
- setattr (TestCase , testName , instanceMethod )
121
+ for func , test in test_tokenizer ():
122
+ if 'contentModelFlags' not in test :
123
+ test ["contentModelFlags" ] = ["PCDATA" ]
124
+ for contentModelFlag in test ["contentModelFlags" ]:
125
+ tests += 1
126
+ testName = 'test%d' % tests
127
+ test ["contentModelFlag" ] = contentModelFlag
128
+ testFunc = lambda self , method = func , test = test : \
129
+ method (self , test )
130
+ testFunc .__doc__ = "\t " .join ([test ['description' ], str (test ['input' ])])
131
+ instanceMethod = new .instancemethod (testFunc , None , TestCase )
132
+ setattr (TestCase , testName , instanceMethod )
114
133
return unittest .TestLoader ().loadTestsFromTestCase (TestCase )
115
134
116
135
def main ():
0 commit comments