Skip to content

Commit 6bc4fc4

Browse files
committed
Re-enable Genshi treewalker.
The Genshi treewalker was never fully updated when support for namespaces was added to treewalkers. This finally fixes this, allowing all tests to once again pass. This removes the ignoring of children of void elements as the commonly used treewalkers don't support this and there was some subtle bug I couldn't see in the code. Given most don't support this, this seems like little loss. (You will, however, still get an error from the treewalker if a void element has children.)
1 parent 3dabe2c commit 6bc4fc4

File tree

2 files changed

+33
-32
lines changed

2 files changed

+33
-32
lines changed

html5lib/tests/test_treewalkers.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -127,15 +127,19 @@ def GenshiAdapter(tree):
127127
name = "{%s}%s" % (token["namespace"], token["name"])
128128
else:
129129
name = token["name"]
130-
yield (START,
131-
(QName(name),
132-
Attrs([(QName(attr),value) for attr,value in token["data"]])),
133-
(None, -1, -1))
130+
attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value)
131+
for attr, value in token["data"].items()])
132+
yield (START, (QName(name), attrs), (None, -1, -1))
134133
if type == "EmptyTag":
135134
type = "EndTag"
136135

137136
if type == "EndTag":
138-
yield END, QName(token["name"]), (None, -1, -1)
137+
if token["namespace"]:
138+
name = "{%s}%s" % (token["namespace"], token["name"])
139+
else:
140+
name = token["name"]
141+
142+
yield END, QName(name), (None, -1, -1)
139143

140144
elif type == "Comment":
141145
yield COMMENT, token["data"], (None, -1, -1)
@@ -150,10 +154,10 @@ def GenshiAdapter(tree):
150154
if text is not None:
151155
yield TEXT, text, (None, -1, -1)
152156

153-
#treeTypes["genshi"] = \
154-
# {"builder": treebuilders.getTreeBuilder("simpletree"),
155-
# "adapter": GenshiAdapter,
156-
# "walker": treewalkers.getTreeWalker("genshi")}
157+
treeTypes["genshi"] = \
158+
{"builder": treebuilders.getTreeBuilder("simpletree"),
159+
"adapter": GenshiAdapter,
160+
"walker": treewalkers.getTreeWalker("genshi")}
157161
except ImportError:
158162
pass
159163

html5lib/treewalkers/genshistream.py

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,49 @@
11
from __future__ import absolute_import, division, unicode_literals
22

3+
from genshi.core import QName
34
from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
45
from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
56
from genshi.output import NamespaceFlattener
67

78
from . import _base
89

9-
from html5lib.constants import voidElements
10+
from html5lib.constants import voidElements, namespaces
1011

1112
class TreeWalker(_base.TreeWalker):
1213
def __iter__(self):
13-
depth = 0
14-
ignore_until = None
14+
# Buffer the events so we can pass in the following one
1515
previous = None
1616
for event in self.tree:
1717
if previous is not None:
18-
if previous[0] == START:
19-
depth += 1
20-
if ignore_until <= depth:
21-
ignore_until = None
22-
if ignore_until is None:
23-
for token in self.tokens(previous, event):
24-
yield token
25-
if token["type"] == "EmptyTag":
26-
ignore_until = depth
27-
if previous[0] == END:
28-
depth -= 1
18+
for token in self.tokens(previous, event):
19+
yield token
2920
previous = event
21+
22+
# Don't forget the final event!
3023
if previous is not None:
31-
if ignore_until is None or ignore_until <= depth:
32-
for token in self.tokens(previous, None):
33-
yield token
34-
elif ignore_until is not None:
35-
raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
24+
for token in self.tokens(previous, None):
25+
yield token
3626

3727
def tokens(self, event, next):
3828
kind, data, pos = event
3929
if kind == START:
40-
tag, attrib = data
30+
tag, attribs = data
4131
name = tag.localname
4232
namespace = tag.namespace
43-
if tag in voidElements:
44-
for token in self.emptyTag(namespace, name, list(attrib),
33+
converted_attribs = {}
34+
for k, v in attribs:
35+
if isinstance(k, QName):
36+
converted_attribs[(k.namespace, k.localname)] = v
37+
else:
38+
converted_attribs[(None, k)] = v
39+
40+
if namespace == namespaces["html"] and name in voidElements:
41+
for token in self.emptyTag(namespace, name, converted_attribs,
4542
not next or next[0] != END
4643
or next[1] != tag):
4744
yield token
4845
else:
49-
yield self.startTag(namespace, name, list(attrib))
46+
yield self.startTag(namespace, name, converted_attribs)
5047

5148
elif kind == END:
5249
name = data.localname

0 commit comments

Comments
 (0)