Skip to content

Commit d9fc453

Browse files
author
James Graham
committed
Sort attributes in treebuilders rather than using a regexp
1 parent 1d3e26b commit d9fc453

File tree

6 files changed

+29
-31
lines changed

6 files changed

+29
-31
lines changed

html5lib/tests/test_parser.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import StringIO
55
import unittest
66
import warnings
7+
import re
78

89
warnings.simplefilter("error")
910

@@ -67,12 +68,6 @@
6768
def convertTreeDump(data):
6869
return "\n".join(convert(3)(data).split("\n")[1:])
6970

70-
import re
71-
attrlist = re.compile(r"^(\s+)\w+(?:\s\w+)?=.*(?:\n\1\w+(?:\s\w+)?=.*)+",re.M)
72-
def sortattrs(x):
73-
lines = x.group(0).split("\n")
74-
lines.sort()
75-
return "\n".join(lines)
7671
namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub
7772

7873
class TestCase(unittest.TestCase):
@@ -100,10 +95,8 @@ def runParserTest(self, innerHTML, input, expected, errors, treeClass,
10095
self.assertTrue(False, errorMsg.encode("utf8"))
10196

10297
output = convertTreeDump(p.tree.testSerializer(document))
103-
output = attrlist.sub(sortattrs, output)
10498

10599
expected = convertExpected(expected)
106-
expected = attrlist.sub(sortattrs, expected)
107100
if namespaceHTMLElements:
108101
expected = namespaceExpected(r"\1<html \2>", expected)
109102

@@ -125,8 +118,6 @@ def buildTestSuite():
125118

126119
for treeName, treeCls in treeTypes.iteritems():
127120
files = html5lib_test_files('tree-construction')
128-
#files = [f for f in files if
129-
# not f.split(".")[-2][-2:] in ("s9", "10", "11", "12")] #skip namespace tests for now
130121
for filename in files:
131122
testName = os.path.basename(filename).replace(".dat","")
132123

@@ -151,7 +142,6 @@ def testFunc(self, innerHTML=innerHTML, input=input,
151142
setattr(TestCase, testFunc.__name__,
152143
testFunc)
153144
break
154-
break
155145

156146
return unittest.TestLoader().loadTestsFromTestCase(TestCase)
157147

html5lib/treebuilders/dom.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -199,19 +199,19 @@ def serializeElement(element, indent=0):
199199
name = element.nodeName
200200
rv.append("|%s<%s>"%(' '*indent, name))
201201
if element.hasAttributes():
202-
i = 0
203-
attr = element.attributes.item(i)
204-
while attr:
202+
attributes = []
203+
for i in range(len(element.attributes)):
204+
attr = element.attributes.item(i)
205205
name = attr.nodeName
206206
value = attr.value
207207
ns = attr.namespaceURI
208208
if ns:
209209
name = "%s %s"%(constants.prefixes[ns], attr.localName)
210210
else:
211211
name = attr.nodeName
212-
i += 1
213-
attr = element.attributes.item(i)
212+
attributes.append((name, value))
214213

214+
for name, value in sorted(attributes):
215215
rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
216216
indent += 2
217217
for child in element.childNodes:

html5lib/treebuilders/etree.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,12 +231,18 @@ def serializeElement(element, indent=0):
231231
rv.append("|%s<%s>"%(' '*indent, name))
232232

233233
if hasattr(element, "attrib"):
234+
attributes = []
234235
for name, value in element.attrib.iteritems():
235236
nsmatch = tag_regexp.match(name)
236237
if nsmatch is not None:
237238
ns, name = nsmatch.groups()
238239
prefix = constants.prefixes[ns]
239-
name = "%s %s"%(prefix, name)
240+
attr_string = "%s %s"%(prefix, name)
241+
else:
242+
attr_string = name
243+
attributes.append((attr_string, value))
244+
245+
for name, value in sorted(attributes):
240246
rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
241247
if element.text:
242248
rv.append("|%s\"%s\"" %(' '*(indent+2), element.text))

html5lib/treebuilders/etree_lxml.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
pass
1515

1616
fullTree = True
17+
tag_regexp = re.compile("{([^}]*)}(.*)")
1718

1819
"""Module for supporting the lxml.etree library. The idea here is to use as much
1920
of the native library as possible, without using fragile hacks like custom element
@@ -93,20 +94,21 @@ def serializeElement(element, indent=0):
9394
filter.fromXmlName(element.tag)))
9495

9596
if hasattr(element, "attrib"):
97+
attributes = []
9698
for name, value in element.attrib.iteritems():
97-
nsmatch = etree_builders.tag_regexp.match(name)
98-
if nsmatch:
99-
ns = nsmatch.group(1)
100-
name = nsmatch.group(2)
99+
nsmatch = tag_regexp.match(name)
100+
if nsmatch is not None:
101+
ns, name = nsmatch.groups()
102+
name = filter.fromXmlName(name)
101103
prefix = constants.prefixes[ns]
102-
rv.append('|%s%s %s="%s"' % (' '*(indent+2),
103-
prefix,
104-
filter.fromXmlName(name),
105-
value))
106-
else:
107-
rv.append('|%s%s="%s"' % (' '*(indent+2),
108-
filter.fromXmlName(name),
109-
value))
104+
attr_string = "%s %s"%(prefix, name)
105+
else:
106+
attr_string = filter.fromXmlName(name)
107+
attributes.append((attr_string, value))
108+
109+
for name, value in sorted(attributes):
110+
rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
111+
110112
if element.text:
111113
rv.append("|%s\"%s\"" %(' '*(indent+2), element.text))
112114
indent += 2

html5lib/treebuilders/simpletree.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def printTree(self, indent):
203203
tree = '\n|%s%s' % (' '*indent, unicode(self))
204204
indent += 2
205205
if self.attributes:
206-
for name, value in self.attributes.iteritems():
206+
for name, value in sorted(self.attributes.iteritems()):
207207
if isinstance(name, tuple):
208208
name = "%s %s"%(name[0], name[1])
209209
tree += '\n|%s%s="%s"' % (' ' * indent, name, value)

html5lib/treebuilders/soup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def serializeElement(element, indent=0):
217217
else:
218218
rv.append("|%s<%s>"%(' '*indent, element.name))
219219
if element.attrs:
220-
for name, value in element.attrs:
220+
for name, value in sorted(element.attrs):
221221
rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
222222
indent += 2
223223
if hasattr(element, "contents"):

0 commit comments

Comments
 (0)