Skip to content

Fix alphabeticalattributes filter namepsace problem #324

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 31, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion html5lib/filters/alphabeticalattributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,24 @@
from ordereddict import OrderedDict


def _attr_key(attr):
"""Return an appropriate key for an attribute for sorting
Attributes have a namespace that can be either ``None`` or a string. We
can't compare the two because they're different types, so we convert
``None`` to an empty string first.
"""
return (attr[0][0] or ''), attr[0][1]


class Filter(base.Filter):
def __iter__(self):
for token in base.Filter.__iter__(self):
if token["type"] in ("StartTag", "EmptyTag"):
attrs = OrderedDict()
for name, value in sorted(token["data"].items(),
key=lambda x: x[0]):
key=_attr_key):
attrs[name] = value
token["data"] = attrs
yield token
81 changes: 81 additions & 0 deletions html5lib/tests/test_alphabeticalattributes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from __future__ import absolute_import, division, unicode_literals

try:
from collections import OrderedDict
except ImportError:
from ordereddict import OrderedDict

import pytest

import html5lib
from html5lib.filters.alphabeticalattributes import Filter
from html5lib.serializer import HTMLSerializer


@pytest.mark.parametrize('msg, attrs, expected_attrs', [
(
'no attrs',
{},
{}
),
(
'one attr',
{(None, 'alt'): 'image'},
OrderedDict([((None, 'alt'), 'image')])
),
(
'multiple attrs',
{
(None, 'src'): 'foo',
(None, 'alt'): 'image',
(None, 'style'): 'border: 1px solid black;'
},
OrderedDict([
((None, 'alt'), 'image'),
((None, 'src'), 'foo'),
((None, 'style'), 'border: 1px solid black;')
])
),
])
def test_alphabetizing(msg, attrs, expected_attrs):
tokens = [{'type': 'StartTag', 'name': 'img', 'data': attrs}]
output_tokens = list(Filter(tokens))

attrs = output_tokens[0]['data']
assert attrs == expected_attrs


def test_with_different_namespaces():
tokens = [{
'type': 'StartTag',
'name': 'pattern',
'data': {
(None, 'id'): 'patt1',
('http://www.w3.org/1999/xlink', 'href'): '#patt2'
}
}]
output_tokens = list(Filter(tokens))

attrs = output_tokens[0]['data']
assert attrs == OrderedDict([
((None, 'id'), 'patt1'),
(('http://www.w3.org/1999/xlink', 'href'), '#patt2')
])


def test_with_serializer():
"""Verify filter works in the context of everything else"""
parser = html5lib.HTMLParser()
dom = parser.parseFragment('<svg><pattern xlink:href="#patt2" id="patt1"></svg>')
walker = html5lib.getTreeWalker('etree')
ser = HTMLSerializer(
alphabetical_attributes=True,
quote_attr_values='always'
)

# FIXME(willkg): The "xlink" namespace gets dropped by the serializer. When
# that gets fixed, we can fix this expected result.
assert (
ser.render(walker(dom)) ==
'<svg><pattern id="patt1" href="#patt2"></pattern></svg>'
)