|
1 | 1 | from __future__ import absolute_import, division, unicode_literals
|
2 | 2 |
|
| 3 | +from genshi.core import QName |
3 | 4 | from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
|
4 | 5 | from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
|
5 | 6 | from genshi.output import NamespaceFlattener
|
6 | 7 |
|
7 | 8 | from . import _base
|
8 | 9 |
|
9 |
| -from html5lib.constants import voidElements |
| 10 | +from html5lib.constants import voidElements, namespaces |
10 | 11 |
|
11 | 12 | class TreeWalker(_base.TreeWalker):
|
12 | 13 | def __iter__(self):
|
13 |
| - depth = 0 |
14 |
| - ignore_until = None |
| 14 | + # Buffer the events so we can pass in the following one |
15 | 15 | previous = None
|
16 | 16 | for event in self.tree:
|
17 | 17 | if previous is not None:
|
18 |
| - if previous[0] == START: |
19 |
| - depth += 1 |
20 |
| - if ignore_until <= depth: |
21 |
| - ignore_until = None |
22 |
| - if ignore_until is None: |
23 |
| - for token in self.tokens(previous, event): |
24 |
| - yield token |
25 |
| - if token["type"] == "EmptyTag": |
26 |
| - ignore_until = depth |
27 |
| - if previous[0] == END: |
28 |
| - depth -= 1 |
| 18 | + for token in self.tokens(previous, event): |
| 19 | + yield token |
29 | 20 | previous = event
|
| 21 | + |
| 22 | + # Don't forget the final event! |
30 | 23 | if previous is not None:
|
31 |
| - if ignore_until is None or ignore_until <= depth: |
32 |
| - for token in self.tokens(previous, None): |
33 |
| - yield token |
34 |
| - elif ignore_until is not None: |
35 |
| - raise ValueError("Illformed DOM event stream: void element without END_ELEMENT") |
| 24 | + for token in self.tokens(previous, None): |
| 25 | + yield token |
36 | 26 |
|
37 | 27 | def tokens(self, event, next):
|
38 | 28 | kind, data, pos = event
|
39 | 29 | if kind == START:
|
40 |
| - tag, attrib = data |
| 30 | + tag, attribs = data |
41 | 31 | name = tag.localname
|
42 | 32 | namespace = tag.namespace
|
43 |
| - if tag in voidElements: |
44 |
| - for token in self.emptyTag(namespace, name, list(attrib), |
| 33 | + converted_attribs = {} |
| 34 | + for k, v in attribs: |
| 35 | + if isinstance(k, QName): |
| 36 | + converted_attribs[(k.namespace, k.localname)] = v |
| 37 | + else: |
| 38 | + converted_attribs[(None, k)] = v |
| 39 | + |
| 40 | + if namespace == namespaces["html"] and name in voidElements: |
| 41 | + for token in self.emptyTag(namespace, name, converted_attribs, |
45 | 42 | not next or next[0] != END
|
46 | 43 | or next[1] != tag):
|
47 | 44 | yield token
|
48 | 45 | else:
|
49 |
| - yield self.startTag(namespace, name, list(attrib)) |
| 46 | + yield self.startTag(namespace, name, converted_attribs) |
50 | 47 |
|
51 | 48 | elif kind == END:
|
52 | 49 | name = data.localname
|
|
0 commit comments