Skip to content

Commit c21a84c

Browse files
committed
Fix the tokenizer test harness
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401148
1 parent d2ba0c0 commit c21a84c

File tree

1 file changed

+19
-6
lines changed

1 file changed

+19
-6
lines changed

tests/test_tokenizer.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1+
import sys
12
import os
23
import unittest
34
from support import simplejson, html5lib_test_files
45

56
from html5lib.tokenizer import HTMLTokenizer
67
from html5lib import constants
78

9+
import cStringIO
10+
811
class TokenizerTestParser(object):
912
def __init__(self, contentModelFlag, lastStartTag=None):
1013
self.tokenizer = HTMLTokenizer
@@ -104,19 +107,29 @@ def runTokenizerTest(self, test):
104107
output = concatenateCharacterTokens(test['output'])
105108
if 'lastStartTag' not in test:
106109
test['lastStartTag'] = None
110+
outBuffer = cStringIO.StringIO()
111+
stdout = sys.stdout
112+
sys.stdout = outBuffer
107113
parser = TokenizerTestParser(test['contentModelFlag'],
108114
test['lastStartTag'])
109115
tokens = parser.parse(test['input'])
110116
tokens = concatenateCharacterTokens(tokens)
117+
tokens = normalizeTokens(tokens)
111118
errorMsg = "\n".join(["\n\nContent Model Flag:",
112119
test['contentModelFlag'] ,
113-
"\nInput:", str(test['input']),
114-
"\nExpected:", str(output),
115-
"\nreceived:", str(tokens)])
116-
tokens = normalizeTokens(tokens)
120+
"\nInput:", test['input'],
121+
"\nExpected:", unicode(output),
122+
"\nreceived:", unicode(tokens)])
117123
ignoreErrorOrder = test.get('ignoreErrorOrder', False)
118-
self.assertEquals(tokensMatch(tokens, output, ignoreErrorOrder), True,
119-
errorMsg)
124+
sys.stdout = stdout
125+
try:
126+
self.assertEquals(tokensMatch(tokens, output, ignoreErrorOrder), True,
127+
errorMsg)
128+
except AssertionError:
129+
outBuffer.seek(0)
130+
print outBuffer.read()
131+
print errorMsg
132+
raise
120133

121134
def buildTestSuite():
122135
for filename in html5lib_test_files('tokenizer', '*.test'):

0 commit comments

Comments
 (0)