Skip to content

Commit 5be8620

Browse files
committed
Change the format of attributes within the treebuilder to be a list of dicts with "namespace", "name", and "value". Moving towards foreign content support in the serializer…
1 parent f21a519 commit 5be8620

File tree

9 files changed

+81
-23
lines changed

9 files changed

+81
-23
lines changed

html5lib/filters/inject_meta_charset.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,14 @@ def __iter__(self):
2121
# replace charset with actual encoding
2222
has_http_equiv_content_type = False
2323
content_index = -1
24-
for i,(name,value) in enumerate(token["data"]):
25-
if name.lower() == 'charset':
26-
token["data"][i] = (u'charset', self.encoding)
24+
for i,attr in enumerate(token["data"]):
25+
namespace = attr["namespace"]
26+
name = attr["name"]
27+
value = attr["value"]
28+
if namespace != None:
29+
continue
30+
elif name.lower() == 'charset':
31+
token["data"][i]["value"] = self.encoding
2732
meta_found = True
2833
break
2934
elif name == 'http-equiv' and value.lower() == 'content-type':
@@ -32,15 +37,15 @@ def __iter__(self):
3237
content_index = i
3338
else:
3439
if has_http_equiv_content_type and content_index >= 0:
35-
token["data"][content_index] = (u'content', u'text/html; charset=%s' % self.encoding)
40+
token["data"][content_index]["value"] = u'text/html; charset=%s' % self.encoding
3641
meta_found = True
3742

3843
elif token["name"].lower() == "head" and not meta_found:
3944
# insert meta into empty head
4045
yield {"type": "StartTag", "name": "head",
4146
"data": token["data"]}
4247
yield {"type": "EmptyTag", "name": "meta",
43-
"data": [["charset", self.encoding]]}
48+
"data": [{"namespace": None, "name": "charset", "value": self.encoding}]}
4449
yield {"type": "EndTag", "name": "head"}
4550
meta_found = True
4651
continue
@@ -51,7 +56,7 @@ def __iter__(self):
5156
yield pending.pop(0)
5257
if not meta_found:
5358
yield {"type": "EmptyTag", "name": "meta",
54-
"data": [["charset", self.encoding]]}
59+
"data": [{"namespace": None, "name": "charset", "value": self.encoding}]}
5560
while pending:
5661
yield pending.pop(0)
5762
meta_found = True

html5lib/serializer/htmlserializer.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,10 @@ def serialize(self, treewalker, encoding=None):
224224
attrs = attrs.items()
225225
attrs.sort()
226226
attributes = []
227-
for k,v in attrs:
227+
for attr in attrs:
228+
#TODO: Add namespace support here
229+
k = attr["name"]
230+
v = attr["value"]
228231
if encoding:
229232
k = k.encode(encoding, "strict")
230233
attributes.append(' ')

html5lib/tests/test_treewalkers.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -196,11 +196,17 @@ def convertTokens(tokens):
196196
indent += 2
197197
attrs = token["data"]
198198
if attrs:
199-
if hasattr(attrs, "items"):
200-
attrs = attrs.items()
201-
attrs.sort()
202-
for name, value in attrs:
203-
output.append(u"%s%s=\"%s\"" % (" "*indent, name, value))
199+
attrs.sort(lambda a,b: cmp(a["name"], b["name"]))
200+
for attr in attrs:
201+
if attr["namespace"]:
202+
if attr["namespace"] in constants.prefixes:
203+
name = constants.prefixes[attr["namespace"]]
204+
else:
205+
name = attr["namespace"]
206+
name += u" " + attr["name"]
207+
else:
208+
name = attr["name"]
209+
output.append(u"%s%s=\"%s\"" % (" "*indent, name, attr["value"]))
204210
if type == "EmptyTag":
205211
indent -= 2
206212
elif type == "EndTag":

html5lib/treewalkers/_base.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@ def error(self, msg):
1717
def normalizeAttrs(self, attrs):
1818
if not attrs:
1919
attrs = []
20-
elif hasattr(attrs, 'items'):
21-
attrs = attrs.items()
22-
return [(unicode(name),unicode(value)) for name,value in attrs]
20+
for attr in attrs:
21+
attr["namespace"] = unicode(attr["namespace"]) if attr["namespace"] else None
22+
attr["name"] = unicode(attr["name"])
23+
attr["value"] = unicode(attr["value"])
24+
return attrs
2325

2426
def emptyTag(self, namespace, name, attrs, hasChildren=False):
2527
yield {"type": "EmptyTag", "name": unicode(name),

html5lib/treewalkers/dom.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,14 @@ def getNodeDetails(self, node):
1515
return _base.TEXT, node.nodeValue
1616

1717
elif node.nodeType == Node.ELEMENT_NODE:
18+
attrs = []
19+
for attr in node.attributes.keys():
20+
attr = node.getAttributeNode(attr)
21+
attrs.append({"namespace": attr.namespaceURI,
22+
"name": attr.localName,
23+
"value": attr.value})
1824
return (_base.ELEMENT, node.namespaceURI, node.nodeName,
19-
node.attributes.items(), node.hasChildNodes)
25+
attrs, node.hasChildNodes)
2026

2127
elif node.nodeType == Node.COMMENT_NODE:
2228
return _base.COMMENT, node.nodeValue

html5lib/treewalkers/etree.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,19 @@ def getNodeDetails(self, node):
6969
else:
7070
namespace = None
7171
tag = node.tag
72+
attrs = []
73+
for name, value in node.attrib.items():
74+
match = tag_regexp.match(name)
75+
if match:
76+
attrs.append({"namespace": match.group(1),
77+
"name": match.group(2),
78+
"value": value})
79+
else:
80+
attrs.append({"namespace": None,
81+
"name": name,
82+
"value": value})
7283
return (_base.ELEMENT, namespace, tag,
73-
node.attrib.items(), len(node) or node.text)
84+
attrs, len(node) or node.text)
7485

7586
def getFirstChild(self, node):
7687
if isinstance(node, tuple):

html5lib/treewalkers/lxmletree.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,19 @@ def getNodeDetails(self, node):
140140
else:
141141
namespace = None
142142
tag = node.tag
143+
attrs = []
144+
for name, value in node.attrib.items():
145+
match = tag_regexp.match(name)
146+
if match:
147+
attrs.append({"namespace": match.group(1),
148+
"name": match.group(2),
149+
"value": value})
150+
else:
151+
attrs.append({"namespace": None,
152+
"name": name,
153+
"value": value})
143154
return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag),
144-
[(self.filter.fromXmlName(name), value) for
145-
name,value in node.attrib.iteritems()],
146-
len(node) > 0 or node.text)
155+
attrs, len(node) > 0 or node.text)
147156

148157
def getFirstChild(self, node):
149158
assert not isinstance(node, tuple), _("Text nodes have no children")

html5lib/treewalkers/pulldom.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,20 @@ def tokens(self, event, next):
3030
if type == START_ELEMENT:
3131
name = node.nodeName
3232
namespace = node.namespaceURI
33+
attrs = []
34+
for attr in node.attributes.keys():
35+
attr = node.getAttributeNode(attr)
36+
attrs.append({"namespace": attr.namespaceURI,
37+
"name": attr.localName,
38+
"value": attr.value})
3339
if name in voidElements:
3440
for token in self.emptyTag(namespace,
3541
name,
36-
node.attributes.items(),
42+
attrs,
3743
not next or next[1] is not node):
3844
yield token
3945
else:
40-
yield self.startTag(namespace, name, node.attributes.items())
46+
yield self.startTag(namespace, name, attrs)
4147

4248
elif type == END_ELEMENT:
4349
name = node.nodeName

html5lib/treewalkers/simpletree.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,18 @@ def getNodeDetails(self, node):
3232
return _base.TEXT, node.value
3333

3434
elif node.type == 5: # Element
35+
attrs = []
36+
for name, value in node.attributes.items():
37+
if isinstance(name, tuple):
38+
attrs.append({"namespace": name[2],
39+
"name": name[1],
40+
"value": value})
41+
else:
42+
attrs.append({"namespace": None,
43+
"name": name,
44+
"value": value})
3545
return (_base.ELEMENT, node.namespace, node.name,
36-
node.attributes.items(), node.hasContent())
46+
attrs, node.hasContent())
3747

3848
elif node.type == 6: # CommentNode
3949
return _base.COMMENT, node.data

0 commit comments

Comments
 (0)