|
| 1 | +import sys |
| 2 | +import os |
| 3 | +import glob |
| 4 | +import StringIO |
| 5 | +import unittest |
| 6 | +import new |
| 7 | + |
| 8 | +# XXX Allow us to import the sibling module |
| 9 | +os.chdir(os.path.split(os.path.abspath(__file__))[0]) |
| 10 | +sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src"))) |
| 11 | + |
| 12 | +import parser |
| 13 | +import treebuilders |
| 14 | + |
| 15 | +def parseTestcase(testString): |
| 16 | + testString = testString.split("\n") |
| 17 | + try: |
| 18 | + if testString[0] != "#data": |
| 19 | + print testString |
| 20 | + assert testString[0] == "#data" |
| 21 | + except: |
| 22 | + raise |
| 23 | + input = [] |
| 24 | + output = [] |
| 25 | + errors = [] |
| 26 | + currentList = input |
| 27 | + for line in testString: |
| 28 | + if line and not (line.startswith("#errors") or |
| 29 | + line.startswith("#document") or line.startswith("#data")): |
| 30 | + if currentList is output: |
| 31 | + if line.startswith("|"): |
| 32 | + currentList.append(line[2:]) |
| 33 | + else: |
| 34 | + currentList.append(line) |
| 35 | + else: |
| 36 | + currentList.append(line) |
| 37 | + elif line == "#errors": |
| 38 | + currentList = errors |
| 39 | + elif line == "#document": |
| 40 | + currentList = output |
| 41 | + return "\n".join(input), "\n".join(output), errors |
| 42 | + |
| 43 | +def convertTreeDump(treedump): |
| 44 | + """convert the output of str(document) to the format used in the testcases""" |
| 45 | + treedump = treedump.split("\n")[1:] |
| 46 | + rv = [] |
| 47 | + for line in treedump: |
| 48 | + if line.startswith("|"): |
| 49 | + rv.append(line[3:]) |
| 50 | + else: |
| 51 | + rv.append(line) |
| 52 | + return "\n".join(rv) |
| 53 | + |
| 54 | +class TestCase(unittest.TestCase): |
| 55 | + def runParserTest(self, input, output, errors): |
| 56 | + #XXX - move this out into the setup function |
| 57 | + #concatenate all consecutive character tokens into a single token |
| 58 | + from treebuilders import etree |
| 59 | + treebuilder = etree.TreeBuilder |
| 60 | + |
| 61 | + p = parser.HTMLParser(tree = treebuilder) |
| 62 | + document = p.parse(StringIO.StringIO(input)) |
| 63 | + errorMsg = "\n".join(["\n\nExpected:", output, "\nRecieved:", |
| 64 | + convertTreeDump(p.tree.testSerializer(document))]) |
| 65 | + self.assertEquals(output, |
| 66 | + convertTreeDump(p.tree.testSerializer(document)), |
| 67 | + errorMsg) |
| 68 | + #errorMsg2 = "\n".join(["\n\nInput errors:\n" + "\n".join(errors), |
| 69 | + # "Actual errors:\n" + "\n".join(p.errors)]) |
| 70 | + #self.assertEquals(len(p.errors), len(errors), errorMsg2) |
| 71 | + |
| 72 | +def test_parser(): |
| 73 | + for filename in glob.glob('tree-construction/*.dat'): |
| 74 | + f = open(filename) |
| 75 | + tests = f.read().split("#data\n") |
| 76 | + for test in tests: |
| 77 | + if test == "": |
| 78 | + continue |
| 79 | + test = "#data\n" + test |
| 80 | + input, output, errors = parseTestcase(test) |
| 81 | + yield TestCase.runParserTest, input, output, errors |
| 82 | + |
| 83 | +def buildTestSuite(): |
| 84 | + tests = 0 |
| 85 | + for func, input, output, errors in test_parser(): |
| 86 | + tests += 1 |
| 87 | + testName = 'test%d' % tests |
| 88 | + testFunc = lambda self, method=func, input=input, output=output, \ |
| 89 | + errors=errors: method(self, input, output, errors) |
| 90 | + testFunc.__doc__ = 'Parser %s: %s' % (testName, input) |
| 91 | + instanceMethod = new.instancemethod(testFunc, None, TestCase) |
| 92 | + setattr(TestCase, testName, instanceMethod) |
| 93 | + return unittest.TestLoader().loadTestsFromTestCase(TestCase) |
| 94 | + |
| 95 | +def main(): |
| 96 | + buildTestSuite() |
| 97 | + unittest.main() |
| 98 | + |
| 99 | +if __name__ == "__main__": |
| 100 | + # XXX Allow us to import the sibling module |
| 101 | + os.chdir(os.path.split(os.path.abspath(__file__))[0]) |
| 102 | + sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src"))) |
| 103 | + main() |
0 commit comments