diff --git a/html5lib/filters/alphabeticalattributes.py b/html5lib/filters/alphabeticalattributes.py
index 4795baec..f938ba1a 100644
--- a/html5lib/filters/alphabeticalattributes.py
+++ b/html5lib/filters/alphabeticalattributes.py
@@ -8,13 +8,24 @@
from ordereddict import OrderedDict
+def _attr_key(attr):
+ """Return an appropriate key for an attribute for sorting
+
+ Attributes have a namespace that can be either ``None`` or a string. We
+ can't compare the two because they're different types, so we convert
+ ``None`` to an empty string first.
+
+ """
+ return (attr[0][0] or ''), attr[0][1]
+
+
class Filter(base.Filter):
def __iter__(self):
for token in base.Filter.__iter__(self):
if token["type"] in ("StartTag", "EmptyTag"):
attrs = OrderedDict()
for name, value in sorted(token["data"].items(),
- key=lambda x: x[0]):
+ key=_attr_key):
attrs[name] = value
token["data"] = attrs
yield token
diff --git a/html5lib/tests/test_alphabeticalattributes.py b/html5lib/tests/test_alphabeticalattributes.py
new file mode 100644
index 00000000..9e560a1e
--- /dev/null
+++ b/html5lib/tests/test_alphabeticalattributes.py
@@ -0,0 +1,81 @@
+from __future__ import absolute_import, division, unicode_literals
+
+try:
+ from collections import OrderedDict
+except ImportError:
+ from ordereddict import OrderedDict
+
+import pytest
+
+import html5lib
+from html5lib.filters.alphabeticalattributes import Filter
+from html5lib.serializer import HTMLSerializer
+
+
+@pytest.mark.parametrize('msg, attrs, expected_attrs', [
+ (
+ 'no attrs',
+ {},
+ {}
+ ),
+ (
+ 'one attr',
+ {(None, 'alt'): 'image'},
+ OrderedDict([((None, 'alt'), 'image')])
+ ),
+ (
+ 'multiple attrs',
+ {
+ (None, 'src'): 'foo',
+ (None, 'alt'): 'image',
+ (None, 'style'): 'border: 1px solid black;'
+ },
+ OrderedDict([
+ ((None, 'alt'), 'image'),
+ ((None, 'src'), 'foo'),
+ ((None, 'style'), 'border: 1px solid black;')
+ ])
+ ),
+])
+def test_alphabetizing(msg, attrs, expected_attrs):
+ tokens = [{'type': 'StartTag', 'name': 'img', 'data': attrs}]
+ output_tokens = list(Filter(tokens))
+
+ attrs = output_tokens[0]['data']
+ assert attrs == expected_attrs
+
+
+def test_with_different_namespaces():
+ tokens = [{
+ 'type': 'StartTag',
+ 'name': 'pattern',
+ 'data': {
+ (None, 'id'): 'patt1',
+ ('http://www.w3.org/1999/xlink', 'href'): '#patt2'
+ }
+ }]
+ output_tokens = list(Filter(tokens))
+
+ attrs = output_tokens[0]['data']
+ assert attrs == OrderedDict([
+ ((None, 'id'), 'patt1'),
+ (('http://www.w3.org/1999/xlink', 'href'), '#patt2')
+ ])
+
+
+def test_with_serializer():
+ """Verify filter works in the context of everything else"""
+ parser = html5lib.HTMLParser()
+ dom = parser.parseFragment('')
+ walker = html5lib.getTreeWalker('etree')
+ ser = HTMLSerializer(
+ alphabetical_attributes=True,
+ quote_attr_values='always'
+ )
+
+ # FIXME(willkg): The "xlink" namespace gets dropped by the serializer. When
+ # that gets fixed, we can fix this expected result.
+ assert (
+ ser.render(walker(dom)) ==
+ ''
+ )