Skip to content

Commit af96a74

Browse files
committed
Fix several treewalker issues including the lack of end tag tokens in the elementtree treewalker
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40991
1 parent a83fbe4 commit af96a74

File tree

4 files changed

+37
-19
lines changed

4 files changed

+37
-19
lines changed

src/html5lib/treewalkers/_base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ def __iter__(self):
103103
details = self.getNodeDetails(currentNode)
104104
type, details = details[0], details[1:]
105105
hasChildren = False
106+
endTag = None
106107

107108
if type == DOCTYPE:
108109
yield self.doctype(*details)
@@ -118,6 +119,7 @@ def __iter__(self):
118119
yield token
119120
hasChildren = False
120121
else:
122+
endTag = name
121123
yield self.startTag(name, attributes)
122124

123125
elif type == COMMENT:

src/html5lib/treewalkers/etree.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -78,19 +78,20 @@ def getFirstChild(self, node):
7878
return (node, 0, parents)
7979

8080
def getNextSibling(self, node):
81-
assert isinstance(node, tuple), "Node is not a tuple: " + str(node)
82-
83-
elt, key, parents = node
84-
if key == "text":
85-
key = -1
86-
elif key == "tail":
87-
elt, key = parents.pop()
81+
if isinstance(node, tuple):
82+
elt, key, parents = node
83+
if key == "text":
84+
key = -1
85+
elif key == "tail":
86+
elt, key = parents.pop()
87+
else:
88+
# Look for "tail" of the "revisited" node
89+
child = elt[key]
90+
if child.tail:
91+
parents.append((elt, key))
92+
return (child, "tail", parents)
8893
else:
89-
# Look for "tail" of the "revisited" node
90-
child = elt[key]
91-
if child.tail:
92-
parents.append((elt, key))
93-
return (child, "tail", parents)
94+
return None
9495

9596
# case where key were "text" or "tail" or elt[key] had a tail
9697
key += 1
@@ -106,7 +107,6 @@ def getParentNode(self, node):
106107
elt, key = parents.pop()
107108
return elt, key, parents
108109
else:
109-
# HACK: We could return ``elt`` but None will stop the algorithm the same way
110-
return None
110+
return elt
111111

112112
return locals()

src/html5lib/treewalkers/genshistream.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, \
1+
from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT, \
22
START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
33
from genshi.output import NamespaceFlattener
44

@@ -59,7 +59,7 @@ def tokens(self, event, next):
5959
elif kind == DOCTYPE:
6060
yield self.doctype(*data)
6161

62-
elif kind in (XML_DECL, DOCTYPE, START_NS, END_NS, \
62+
elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, \
6363
START_CDATA, END_CDATA, PI):
6464
pass
6565

tests/test_treewalkers.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,19 +208,35 @@ def runTest(self, innerHTML, input, expected, errors, treeClass):
208208
document = p.parse(StringIO.StringIO(input))
209209
document = treeClass.get("adapter", lambda x: x)(document)
210210
try:
211-
output = convertTokens(LintFilter(treeClass["walker"](document)))
211+
output = convertTokens(treeClass["walker"](document))
212212
output = attrlist.sub(sortattrs, output)
213213
expected = attrlist.sub(sortattrs, convertExpected(expected))
214214
self.assertEquals(expected, output, "\n".join([
215215
"", "Input:", input,
216216
"", "Expected:", expected,
217217
"", "Recieved:", output
218218
]))
219-
except LintError, le:
220-
self.fail(input + "\n" + le.message)
221219
except NotImplementedError:
222220
pass # Amnesty for those that confess...
223221

222+
class TokenTestCase(unittest.TestCase):
223+
def test_all_tokens(self):
224+
expected = [
225+
{'data': [], 'type': 'StartTag', 'name': u'html'},
226+
{'data': [], 'type': 'StartTag', 'name': u'head'},
227+
{'data': [], 'type': 'EndTag', 'name': u'head'},
228+
{'data': [], 'type': 'StartTag', 'name': u'body'},
229+
{'data': [], 'type': 'EndTag', 'name': u'body'},
230+
{'data': [], 'type': 'EndTag', 'name': u'html'}]
231+
for treeName, treeCls in treeTypes.iteritems():
232+
p = html5parser.HTMLParser(tree = treeCls["builder"])
233+
document = p.parse("<html></html>")
234+
document = treeCls.get("adapter", lambda x: x)(document)
235+
output = treeCls["walker"](document)
236+
for expectedToken, outputToken in zip(expected, output):
237+
self.assertEquals(expectedToken, outputToken)
238+
239+
224240
def buildTestSuite():
225241
sys.stdout.write('Testing tree walkers '+ " ".join(treeTypes.keys()) + "\n")
226242

0 commit comments

Comments
 (0)