diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..6facf352 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,8 @@ +[run] +branch = True +source = html5lib + +[paths] +source = + html5lib + .tox/*/lib/python*/site-packages/html5lib diff --git a/.prospector.yaml b/.prospector.yaml new file mode 100644 index 00000000..7e8efe1a --- /dev/null +++ b/.prospector.yaml @@ -0,0 +1,21 @@ +strictness: veryhigh +doc-warnings: false +test-warnings: false + +max-line-length: 139 + +requirements: + - requirements.txt + - requirements-test.txt + - requirements-optional.txt + +ignore-paths: + - parse.py + - utils/ + +python-targets: + - 2 + - 3 + +mccabe: + run: false diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 00000000..ea74d5db --- /dev/null +++ b/.pylintrc @@ -0,0 +1,10 @@ +[MASTER] +ignore=tests + +[MESSAGES CONTROL] +# messages up to fixme should probably be fixed somehow +disable = redefined-builtin,attribute-defined-outside-init,anomalous-backslash-in-string,no-self-use,redefined-outer-name,bad-continuation,wrong-import-order,superfluous-parens,no-member,duplicate-code,super-init-not-called,abstract-method,property-on-old-class,wrong-import-position,no-name-in-module,no-init,bad-mcs-classmethod-argument,bad-classmethod-argument,fixme,invalid-name,import-error,too-few-public-methods,too-many-ancestors,too-many-arguments,too-many-boolean-expressions,too-many-branches,too-many-instance-attributes,too-many-locals,too-many-lines,too-many-public-methods,too-many-return-statements,too-many-statements,missing-docstring,line-too-long,locally-disabled,locally-enabled,bad-builtin,deprecated-lambda + +[FORMAT] +max-line-length=139 +single-line-if-stmt=no diff --git a/.pytest.expect b/.pytest.expect new file mode 100644 index 00000000..0fa326f0 --- /dev/null +++ b/.pytest.expect @@ -0,0 +1,1322 @@ +pytest-expect file v1 +(2, 7, 11, 'final', 0) +b'html5lib/tests/test_encoding.py::test_encoding::[110]': FAIL +b'html5lib/tests/test_encoding.py::test_encoding::[111]': FAIL +u'html5lib/tests/testdata/tokenizer/test2.test::0::dataState': FAIL +u'html5lib/tests/testdata/tokenizer/test3.test::228::dataState': FAIL +u'html5lib/tests/testdata/tokenizer/test3.test::231::dataState': FAIL +u'html5lib/tests/testdata/tokenizer/test3.test::232::dataState': FAIL +u'html5lib/tests/testdata/tokenizer/test3.test::234::dataState': FAIL +u'html5lib/tests/testdata/tokenizer/test3.test::235::dataState': FAIL +u'html5lib/tests/testdata/tokenizer/test3.test::237::dataState': FAIL +u'html5lib/tests/testdata/tokenizer/test3.test::240::dataState': FAIL +u'html5lib/tests/testdata/tokenizer/test3.test::241::dataState': FAIL +u'html5lib/tests/testdata/tokenizer/test3.test::243::dataState': FAIL +u'html5lib/tests/testdata/tokenizer/test3.test::244::dataState': FAIL +u'html5lib/tests/testdata/tokenizer/test3.test::246::dataState': FAIL +u'html5lib/tests/testdata/tokenizer/test3.test::258::dataState': FAIL +u'html5lib/tests/testdata/tokenizer/test3.test::656::dataState': FAIL +u'html5lib/tests/testdata/tree-construction/adoption01.dat::17::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/adoption01.dat::17::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/adoption01.dat::17::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/adoption01.dat::17::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/adoption01.dat::17::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/adoption01.dat::17::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/adoption01.dat::17::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/adoption01.dat::17::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::0::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::0::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::0::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::0::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::0::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::0::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::0::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::0::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::18::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::18::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::18::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::18::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::18::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::18::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::18::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::18::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::19::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::19::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::19::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::19::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::19::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::19::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::19::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::19::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::1::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::1::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::1::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::1::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::1::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::1::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::1::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::1::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::22::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::22::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::22::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::22::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::22::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::22::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::22::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::22::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::23::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::23::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::23::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::23::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::23::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::23::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::23::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::23::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::26::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::26::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::26::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::26::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::26::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::26::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::26::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::26::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::27::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::27::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::27::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::27::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::27::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::27::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::27::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::27::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::2::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::2::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::2::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::2::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::2::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::2::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::2::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::2::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::30::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::30::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::30::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::30::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::30::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::30::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::30::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::30::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::31::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::31::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::31::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::31::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::31::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::31::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::31::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::31::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::34::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::34::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::34::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::34::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::34::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::34::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::34::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::34::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::35::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::35::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::35::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::35::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::35::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::35::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::35::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::35::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::38::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::38::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::38::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::38::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::38::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::38::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::38::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::38::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::39::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::39::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::39::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::39::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::39::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::39::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::39::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::39::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::3::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::3::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::3::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::3::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::3::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::3::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::3::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::3::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::40::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::40::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::40::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::40::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::40::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::40::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::40::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::40::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::41::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::41::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::41::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::41::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::41::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::41::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::41::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::41::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::47::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::47::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::47::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::47::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::47::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::47::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::47::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::47::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::48::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::48::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::48::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::48::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::48::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::48::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::48::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::48::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::0::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::0::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::0::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::0::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::0::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::0::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::0::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::0::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::1::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::1::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::1::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::1::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::1::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::1::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::1::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::1::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::2::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::2::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::2::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::2::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::2::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::2::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::2::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::2::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::3::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::3::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::3::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::3::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::3::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::3::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::3::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/isindex.dat::3::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::3::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::3::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::3::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::3::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::3::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::3::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::3::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::3::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::4::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::4::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::4::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::4::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::4::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::4::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::4::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::4::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::5::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::5::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::5::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::5::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::5::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::5::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::5::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::5::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/namespace-sensitivity.dat::0::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/namespace-sensitivity.dat::0::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/namespace-sensitivity.dat::0::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/namespace-sensitivity.dat::0::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/namespace-sensitivity.dat::0::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/namespace-sensitivity.dat::0::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/namespace-sensitivity.dat::0::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/namespace-sensitivity.dat::0::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::0::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::0::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::0::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::0::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::0::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::0::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::0::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::0::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::10::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::10::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::10::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::10::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::10::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::10::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::10::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::10::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::12::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::12::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::12::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::12::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::12::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::12::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::12::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::12::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::15::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::15::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::15::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::15::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::15::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::15::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::15::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::15::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::17::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::17::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::17::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::17::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::17::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::17::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::17::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::17::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::1::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::1::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::1::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::1::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::1::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::1::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::1::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::1::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::20::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::20::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::20::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::20::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::20::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::20::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::20::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::20::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::2::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::2::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::2::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::2::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::2::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::2::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::2::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::2::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::3::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::3::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::3::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::3::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::3::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::3::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::3::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::3::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::5::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::5::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::5::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::5::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::5::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::5::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::5::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::5::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::7::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::7::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::7::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::7::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::7::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::7::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::7::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/ruby.dat::7::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/adoption01.dat::0::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/adoption01.dat::0::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/adoption01.dat::0::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/adoption01.dat::0::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/adoption01.dat::0::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/adoption01.dat::0::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/adoption01.dat::0::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/adoption01.dat::0::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/ark.dat::0::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/ark.dat::0::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/ark.dat::0::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/ark.dat::0::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/ark.dat::0::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/ark.dat::0::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/ark.dat::0::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/ark.dat::0::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/webkit01.dat::0::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/webkit01.dat::0::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/webkit01.dat::0::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/webkit01.dat::0::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/webkit01.dat::0::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/webkit01.dat::0::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/webkit01.dat::0::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/webkit01.dat::0::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/webkit01.dat::1::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/webkit01.dat::1::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/webkit01.dat::1::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/webkit01.dat::1::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/webkit01.dat::1::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/webkit01.dat::1::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/webkit01.dat::1::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/scripted/webkit01.dat::1::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::0::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::0::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::0::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::0::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::0::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::0::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::0::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::0::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::100::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::100::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::100::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::100::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::100::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::100::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::100::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::100::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::101::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::101::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::101::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::101::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::101::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::101::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::101::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::101::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::102::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::102::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::102::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::102::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::102::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::102::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::102::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::102::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::103::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::103::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::103::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::103::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::103::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::103::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::103::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::103::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::104::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::104::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::104::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::104::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::104::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::104::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::104::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::104::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::105::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::105::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::105::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::105::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::105::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::105::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::105::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::105::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::106::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::106::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::106::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::106::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::106::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::106::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::106::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::106::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::107::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::107::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::107::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::107::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::107::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::107::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::107::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::107::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::10::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::10::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::10::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::10::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::10::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::10::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::10::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::10::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::11::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::11::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::11::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::11::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::11::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::11::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::11::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::11::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::12::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::12::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::12::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::12::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::12::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::12::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::12::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::12::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::13::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::13::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::13::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::13::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::13::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::13::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::13::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::13::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::14::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::14::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::14::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::14::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::14::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::14::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::14::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::14::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::15::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::15::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::15::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::15::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::15::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::15::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::15::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::15::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::16::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::16::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::16::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::16::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::16::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::16::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::16::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::16::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::17::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::17::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::17::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::17::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::17::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::17::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::17::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::17::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::18::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::18::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::18::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::18::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::18::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::18::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::18::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::18::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::19::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::19::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::19::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::19::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::19::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::19::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::19::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::19::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::1::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::1::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::1::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::1::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::1::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::1::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::1::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::1::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::20::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::20::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::20::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::20::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::20::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::20::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::20::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::20::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::21::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::21::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::21::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::21::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::21::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::21::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::21::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::21::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::22::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::22::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::22::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::22::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::22::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::22::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::22::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::22::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::23::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::23::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::23::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::23::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::23::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::23::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::23::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::23::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::24::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::24::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::24::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::24::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::24::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::24::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::24::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::24::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::25::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::25::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::25::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::25::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::25::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::25::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::25::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::25::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::26::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::26::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::26::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::26::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::26::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::26::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::26::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::26::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::27::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::27::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::27::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::27::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::27::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::27::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::27::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::27::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::28::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::28::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::28::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::28::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::28::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::28::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::28::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::28::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::29::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::29::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::29::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::29::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::29::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::29::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::29::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::29::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::2::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::2::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::2::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::2::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::2::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::2::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::2::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::2::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::30::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::30::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::30::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::30::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::30::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::30::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::30::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::30::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::31::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::31::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::31::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::31::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::31::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::31::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::31::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::31::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::32::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::32::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::32::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::32::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::32::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::32::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::32::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::32::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::33::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::33::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::33::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::33::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::33::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::33::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::33::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::33::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::34::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::34::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::34::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::34::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::34::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::34::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::34::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::34::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::35::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::35::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::35::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::35::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::35::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::35::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::35::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::35::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::36::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::36::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::36::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::36::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::36::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::36::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::36::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::36::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::37::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::37::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::37::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::37::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::37::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::37::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::37::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::37::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::38::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::38::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::38::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::38::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::38::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::38::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::38::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::38::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::3::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::3::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::3::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::3::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::3::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::3::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::3::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::3::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::40::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::40::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::40::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::40::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::40::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::40::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::40::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::40::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::41::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::41::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::41::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::41::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::41::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::41::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::41::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::41::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::42::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::42::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::42::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::42::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::42::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::42::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::42::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::42::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::43::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::43::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::43::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::43::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::43::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::43::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::43::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::43::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::44::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::44::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::44::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::44::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::44::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::44::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::44::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::44::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::45::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::45::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::45::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::45::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::45::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::45::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::45::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::45::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::46::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::46::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::46::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::46::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::46::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::46::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::46::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::46::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::47::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::47::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::47::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::47::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::47::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::47::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::47::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::47::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::48::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::48::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::48::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::48::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::48::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::48::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::48::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::48::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::49::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::49::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::49::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::49::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::49::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::49::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::49::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::49::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::4::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::4::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::4::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::4::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::4::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::4::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::4::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::4::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::50::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::50::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::50::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::50::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::50::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::50::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::50::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::50::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::51::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::51::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::51::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::51::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::51::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::51::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::51::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::51::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::52::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::52::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::52::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::52::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::52::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::52::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::52::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::52::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::53::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::53::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::53::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::53::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::53::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::53::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::53::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::53::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::54::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::54::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::54::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::54::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::54::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::54::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::54::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::54::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::55::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::55::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::55::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::55::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::55::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::55::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::55::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::55::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::56::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::56::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::56::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::56::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::56::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::56::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::56::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::56::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::57::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::57::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::57::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::57::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::57::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::57::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::57::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::57::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::58::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::58::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::58::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::58::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::58::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::58::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::58::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::58::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::59::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::59::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::59::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::59::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::59::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::59::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::59::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::59::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::5::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::5::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::5::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::5::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::5::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::5::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::5::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::5::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::60::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::60::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::60::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::60::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::60::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::60::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::60::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::60::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::61::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::61::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::61::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::61::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::61::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::61::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::61::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::61::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::62::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::62::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::62::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::62::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::62::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::62::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::62::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::62::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::63::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::63::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::63::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::63::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::63::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::63::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::63::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::63::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::64::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::64::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::64::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::64::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::64::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::64::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::64::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::64::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::65::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::65::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::65::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::65::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::65::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::65::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::65::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::65::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::66::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::66::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::66::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::66::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::66::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::66::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::66::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::66::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::67::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::67::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::67::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::67::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::67::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::67::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::67::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::67::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::68::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::68::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::68::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::68::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::68::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::68::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::68::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::68::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::69::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::69::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::69::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::69::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::69::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::69::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::69::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::69::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::6::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::6::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::6::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::6::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::6::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::6::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::6::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::6::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::70::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::70::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::70::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::70::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::70::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::70::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::70::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::70::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::71::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::71::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::71::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::71::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::71::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::71::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::71::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::71::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::72::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::72::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::72::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::72::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::72::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::72::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::72::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::72::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::73::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::73::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::73::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::73::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::73::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::73::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::73::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::73::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::74::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::74::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::74::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::74::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::74::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::74::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::74::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::74::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::75::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::75::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::75::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::75::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::75::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::75::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::75::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::75::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::76::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::76::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::76::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::76::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::76::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::76::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::76::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::76::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::77::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::77::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::77::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::77::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::77::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::77::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::77::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::77::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::78::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::78::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::78::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::78::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::78::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::78::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::78::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::78::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::79::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::79::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::79::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::79::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::79::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::79::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::79::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::79::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::80::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::80::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::80::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::80::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::80::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::80::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::80::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::80::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::81::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::81::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::81::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::81::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::81::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::81::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::81::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::81::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::82::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::82::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::82::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::82::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::82::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::82::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::82::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::82::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::83::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::83::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::83::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::83::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::83::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::83::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::83::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::83::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::84::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::84::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::84::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::84::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::84::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::84::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::84::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::84::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::85::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::85::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::85::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::85::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::85::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::85::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::85::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::85::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::86::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::86::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::86::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::86::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::86::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::86::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::86::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::86::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::87::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::87::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::87::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::87::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::87::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::87::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::87::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::87::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::88::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::88::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::88::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::88::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::88::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::88::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::88::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::88::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::89::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::89::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::89::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::89::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::89::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::89::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::89::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::89::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::8::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::8::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::8::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::8::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::8::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::8::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::8::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::8::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::90::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::90::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::90::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::90::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::90::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::90::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::90::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::90::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::91::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::91::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::91::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::91::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::91::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::91::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::91::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::91::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::92::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::92::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::92::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::92::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::92::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::92::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::92::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::92::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::93::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::93::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::93::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::93::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::93::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::93::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::93::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::93::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::94::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::94::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::94::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::94::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::94::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::94::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::94::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::94::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::95::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::95::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::95::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::95::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::95::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::95::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::95::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::95::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::96::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::96::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::96::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::96::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::96::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::96::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::96::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::96::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::97::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::97::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::97::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::97::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::97::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::97::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::97::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::97::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::98::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::98::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::98::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::98::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::98::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::98::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::98::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::98::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::99::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::99::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::99::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::99::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::99::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::99::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::99::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::99::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::9::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::9::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::9::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::9::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::9::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::9::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::9::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/template.dat::9::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::2::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::2::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::2::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::2::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::2::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::2::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::2::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::2::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::4::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::4::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::4::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::4::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::4::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::4::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::4::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::4::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::5::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::5::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::5::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::5::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::5::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::5::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::5::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::5::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::6::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::6::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::6::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::6::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::6::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::6::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::6::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests11.dat::6::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::14::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::14::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::14::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::14::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::14::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::14::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::14::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::14::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::17::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::17::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::17::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::17::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::17::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::17::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::17::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::17::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::7::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::7::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::7::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::7::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::7::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::7::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::7::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests19.dat::7::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests2.dat::6::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests2.dat::6::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests2.dat::6::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests2.dat::6::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests2.dat::6::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests2.dat::6::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests2.dat::6::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests2.dat::6::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests2.dat::7::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests2.dat::7::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests2.dat::7::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests2.dat::7::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests2.dat::7::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests2.dat::7::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests2.dat::7::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests2.dat::7::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests25.dat::7::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests25.dat::7::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests25.dat::7::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests25.dat::7::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests25.dat::7::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests25.dat::7::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/tests25.dat::7::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/tests25.dat::7::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::14::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::14::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::14::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::14::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::14::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::14::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::14::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::14::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::15::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::15::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::15::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::15::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::15::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::15::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::15::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::15::lxml::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::16::DOM::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::16::DOM::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::16::ElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::16::ElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::16::cElementTree::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::16::cElementTree::parser::void-namespace': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::16::lxml::parser::namespaced': FAIL +u'html5lib/tests/testdata/tree-construction/webkit02.dat::16::lxml::parser::void-namespace': FAIL diff --git a/.travis.yml b/.travis.yml index 790b3089..94bb87e7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,10 +2,11 @@ language: python python: - "2.6" - "2.7" - - "3.2" - "3.3" - "3.4" + - "3.5" - "pypy" + sudo: false cache: @@ -16,18 +17,6 @@ env: - USE_OPTIONAL=true - USE_OPTIONAL=false -matrix: - exclude: - - python: "2.7" - env: USE_OPTIONAL=false - - python: "3.4" - env: USE_OPTIONAL=false - include: - - python: "2.7" - env: USE_OPTIONAL=false FLAKE=true - - python: "3.4" - env: USE_OPTIONAL=false FLAKE=true - before_install: - git submodule update --init --recursive @@ -35,8 +24,12 @@ install: - bash requirements-install.sh script: - - nosetests + - if [[ $TRAVIS_PYTHON_VERSION == pypy* ]]; then py.test; fi + - if [[ $TRAVIS_PYTHON_VERSION != pypy* ]]; then coverage run -m pytest; fi - bash flake8-run.sh after_script: - python debug-info.py + +after_success: + - if [[ $TRAVIS_PYTHON_VERSION != pypy* ]]; then coverage combine && codecov; fi diff --git a/AUTHORS.rst b/AUTHORS.rst index fe9ae89b..c3820ef7 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -41,3 +41,4 @@ Patches and suggestions - Jim Baker - Michael[tm] Smith - Marc Abramowitz +- Jon Dufresne diff --git a/CHANGES.rst b/CHANGES.rst index d7797bf9..570c9605 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,79 @@ Change Log ---------- +0.999999999/1.0b10 +~~~~~~~~~~~~~~~~~~ + +Released on July 15, 2016 + +* Fix attribute order going to the tree builder to be document order + instead of reverse document order(!). + + +0.99999999/1.0b9 +~~~~~~~~~~~~~~~~ + +Released on July 14, 2016 + +* **Added ordereddict as a mandatory dependency on Python 2.6.** + +* Added ``lxml``, ``genshi``, ``datrie``, ``charade``, and ``all`` + extras that will do the right thing based on the specific + interpreter implementation. + +* Now requires the ``mock`` package for the testsuite. + +* Cease supporting DATrie under PyPy. + +* **Remove ``PullDOM`` support, as this hasn't ever been properly + tested, doesn't entirely work, and as far as I can tell is + completely unused by anyone.** + +* Move testsuite to ``py.test``. + +* **Fix #124: move to webencodings for decoding the input byte stream; + this makes html5lib compliant with the Encoding Standard, and + introduces a required dependency on webencodings.** + +* **Cease supporting Python 3.2 (in both CPython and PyPy forms).** + +* **Fix comments containing double-dash with lxml 3.5 and above.** + +* **Use scripting disabled by default (as we don't implement + scripting).** + +* **Fix #11, avoiding the XSS bug potentially caused by serializer + allowing attribute values to be escaped out of in old browser versions, + changing the quote_attr_values option on serializer to take one of + three values, "always" (the old True value), "legacy" (the new option, + and the new default), and "spec" (the old False value, and the old + default).** + +* **Fix #72 by rewriting the sanitizer to apply only to treewalkers + (instead of the tokenizer); as such, this will require amending all + callers of it to use it via the treewalker API.** + +* **Drop support of charade, now that chardet is supported once more.** + +* **Replace the charset keyword argument on parse and related methods + with a set of keyword arguments: override_encoding, transport_encoding, + same_origin_parent_encoding, likely_encoding, and default_encoding.** + +* **Move filters._base, treebuilder._base, and treewalkers._base to .base + to clarify their status as public.** + +* **Get rid of the sanitizer package. Merge sanitizer.sanitize into the + sanitizer.htmlsanitizer module and move that to saniziter. This means + anyone who used sanitizer.sanitize or sanitizer.HTMLSanitizer needs no + code changes.** + +* **Rename treewalkers.lxmletree to .etree_lxml and + treewalkers.genshistream to .genshi to have a consistent API.** + +* Move a whole load of stuff (inputstream, ihatexml, trie, tokenizer, + utils) to be underscore prefixed to clarify their status as private. + + 0.9999999/1.0b8 ~~~~~~~~~~~~~~~ @@ -173,7 +246,7 @@ Released on May 17, 2013 * Test harness has been improved and now depends on ``nose``. -* Documentation updated and moved to http://html5lib.readthedocs.org/. +* Documentation updated and moved to https://html5lib.readthedocs.io/. 0.95 diff --git a/MANIFEST.in b/MANIFEST.in index 1edd0b7d..4b3ffe3e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,10 @@ include LICENSE +include AUTHORS.rst include CHANGES.rst include README.rst include requirements*.txt +include .pytest.expect +include tox.ini +include pytest.ini graft html5lib/tests/testdata recursive-include html5lib/tests *.py diff --git a/README.rst b/README.rst index 9e0a0f74..2ad46090 100644 --- a/README.rst +++ b/README.rst @@ -51,7 +51,7 @@ pass into html5lib as follows: import html5lib with closing(urlopen("http://example.com/")) as f: - document = html5lib.parse(f, encoding=f.info().getparam("charset")) + document = html5lib.parse(f, transport_encoding=f.info().getparam("charset")) When using with ``urllib.request`` (Python 3), the charset from HTTP should be pass into html5lib as follows: @@ -62,7 +62,7 @@ should be pass into html5lib as follows: import html5lib with urlopen("http://example.com/") as f: - document = html5lib.parse(f, encoding=f.info().get_content_charset()) + document = html5lib.parse(f, transport_encoding=f.info().get_content_charset()) To have more control over the parser, create a parser object explicitly. For instance, to make the parser raise exceptions on parse errors, use: @@ -84,13 +84,13 @@ format: parser = html5lib.HTMLParser(tree=html5lib.getTreeBuilder("dom")) minidom_document = parser.parse("

Hello World!") -More documentation is available at http://html5lib.readthedocs.org/. +More documentation is available at https://html5lib.readthedocs.io/. Installation ------------ -html5lib works on CPython 2.6+, CPython 3.2+ and PyPy. To install it, +html5lib works on CPython 2.6+, CPython 3.3+ and PyPy. To install it, use: .. code-block:: bash @@ -104,8 +104,8 @@ Optional Dependencies The following third-party libraries may be used for additional functionality: -- ``datrie`` can be used to improve parsing performance (though in - almost all cases the improvement is marginal); +- ``datrie`` can be used under CPython to improve parsing performance + (though in almost all cases the improvement is marginal); - ``lxml`` is supported as a tree format (for both building and walking) under CPython (but *not* PyPy where it is known to cause @@ -113,13 +113,8 @@ functionality: - ``genshi`` has a treewalker (but not builder); and -- ``charade`` can be used as a fallback when character encoding cannot - be determined; ``chardet``, from which it was forked, can also be used - on Python 2. - -- ``ordereddict`` can be used under Python 2.6 - (``collections.OrderedDict`` is used instead on later versions) to - serialize attributes in alphabetical order. +- ``chardet`` can be used as a fallback when character encoding cannot + be determined. Bugs @@ -132,9 +127,9 @@ Please report any bugs on the `issue tracker Tests ----- -Unit tests require the ``nose`` library and can be run using the -``nosetests`` command in the root directory; ``ordereddict`` is -required under Python 2.6. All should pass. +Unit tests require the ``pytest`` and ``mock`` libraries and can be +run using the ``py.test`` command in the root directory; +``ordereddict`` is required under Python 2.6. All should pass. Test data are contained in a separate `html5lib-tests `_ repository and included diff --git a/debug-info.py b/debug-info.py index b5d2bb6a..f93fbdbe 100644 --- a/debug-info.py +++ b/debug-info.py @@ -12,7 +12,7 @@ "maxsize": sys.maxsize } -search_modules = ["charade", "chardet", "datrie", "genshi", "html5lib", "lxml", "six"] +search_modules = ["chardet", "datrie", "genshi", "html5lib", "lxml", "six"] found_modules = [] for m in search_modules: diff --git a/doc/conf.py b/doc/conf.py index 434f21c4..e02218b8 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -126,7 +126,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +#html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. diff --git a/doc/html5lib.filters.rst b/doc/html5lib.filters.rst index 1fda38a7..38d4a956 100644 --- a/doc/html5lib.filters.rst +++ b/doc/html5lib.filters.rst @@ -1,10 +1,10 @@ filters Package =============== -:mod:`_base` Module +:mod:`base` Module ------------------- -.. automodule:: html5lib.filters._base +.. automodule:: html5lib.filters.base :members: :undoc-members: :show-inheritance: diff --git a/doc/html5lib.rst b/doc/html5lib.rst index d4ed12b4..f0646aac 100644 --- a/doc/html5lib.rst +++ b/doc/html5lib.rst @@ -25,42 +25,10 @@ html5lib Package :undoc-members: :show-inheritance: -:mod:`ihatexml` Module +:mod:`serializer` Module ---------------------- -.. automodule:: html5lib.ihatexml - :members: - :undoc-members: - :show-inheritance: - -:mod:`inputstream` Module -------------------------- - -.. automodule:: html5lib.inputstream - :members: - :undoc-members: - :show-inheritance: - -:mod:`sanitizer` Module ------------------------ - -.. automodule:: html5lib.sanitizer - :members: - :undoc-members: - :show-inheritance: - -:mod:`tokenizer` Module ------------------------ - -.. automodule:: html5lib.tokenizer - :members: - :undoc-members: - :show-inheritance: - -:mod:`utils` Module -------------------- - -.. automodule:: html5lib.utils +.. automodule:: html5lib.serializer :members: :undoc-members: :show-inheritance: @@ -71,7 +39,6 @@ Subpackages .. toctree:: html5lib.filters - html5lib.serializer html5lib.treebuilders html5lib.treewalkers diff --git a/doc/html5lib.serializer.rst b/doc/html5lib.serializer.rst deleted file mode 100644 index fa954742..00000000 --- a/doc/html5lib.serializer.rst +++ /dev/null @@ -1,19 +0,0 @@ -serializer Package -================== - -:mod:`serializer` Package -------------------------- - -.. automodule:: html5lib.serializer - :members: - :undoc-members: - :show-inheritance: - -:mod:`htmlserializer` Module ----------------------------- - -.. automodule:: html5lib.serializer.htmlserializer - :members: - :undoc-members: - :show-inheritance: - diff --git a/doc/html5lib.treebuilders.rst b/doc/html5lib.treebuilders.rst index 99119839..aee82142 100644 --- a/doc/html5lib.treebuilders.rst +++ b/doc/html5lib.treebuilders.rst @@ -9,10 +9,10 @@ treebuilders Package :undoc-members: :show-inheritance: -:mod:`_base` Module +:mod:`base` Module ------------------- -.. automodule:: html5lib.treebuilders._base +.. automodule:: html5lib.treebuilders.base :members: :undoc-members: :show-inheritance: diff --git a/doc/html5lib.treewalkers.rst b/doc/html5lib.treewalkers.rst index 80595e2d..46501258 100644 --- a/doc/html5lib.treewalkers.rst +++ b/doc/html5lib.treewalkers.rst @@ -9,10 +9,10 @@ treewalkers Package :undoc-members: :show-inheritance: -:mod:`_base` Module +:mod:`base` Module ------------------- -.. automodule:: html5lib.treewalkers._base +.. automodule:: html5lib.treewalkers.base :members: :undoc-members: :show-inheritance: @@ -33,27 +33,19 @@ treewalkers Package :undoc-members: :show-inheritance: -:mod:`genshistream` Module --------------------------- - -.. automodule:: html5lib.treewalkers.genshistream - :members: - :undoc-members: - :show-inheritance: - -:mod:`lxmletree` Module +:mod:`etree_lxml` Module ----------------------- -.. automodule:: html5lib.treewalkers.lxmletree +.. automodule:: html5lib.treewalkers.etree_lxml :members: :undoc-members: :show-inheritance: -:mod:`pulldom` Module ---------------------- -.. automodule:: html5lib.treewalkers.pulldom +:mod:`genshi` Module +-------------------------- + +.. automodule:: html5lib.treewalkers.genshi :members: :undoc-members: - :show-inheritance: - + :show-inheritance: \ No newline at end of file diff --git a/doc/index.rst b/doc/index.rst index ca2e1b96..27104b14 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -8,6 +8,7 @@ Overview :maxdepth: 2 movingparts + modules changes License diff --git a/flake8-run.sh b/flake8-run.sh index d1a587d3..d9264946 100755 --- a/flake8-run.sh +++ b/flake8-run.sh @@ -5,10 +5,5 @@ if [[ ! -x $(which flake8) ]]; then exit 1 fi -if [[ $TRAVIS != "true" || $FLAKE == "true" ]]; then - find html5lib/ -name '*.py' -and -not -name 'constants.py' -print0 | xargs -0 flake8 --ignore=E501 - flake1=$? - flake8 --max-line-length=99 --ignore=E126 html5lib/constants.py - flake2=$? - exit $[$flake1 || $flake2] -fi +flake8 `dirname $0` +exit $? diff --git a/html5lib/__init__.py b/html5lib/__init__.py index 962536cb..8ee9b53e 100644 --- a/html5lib/__init__.py +++ b/html5lib/__init__.py @@ -22,4 +22,4 @@ "getTreeWalker", "serialize"] # this has to be at the top level, see how setup.py parses this -__version__ = "0.9999999" +__version__ = "0.999999999" diff --git a/html5lib/ihatexml.py b/html5lib/_ihatexml.py similarity index 97% rename from html5lib/ihatexml.py rename to html5lib/_ihatexml.py index 0fc79308..d6d1d6fb 100644 --- a/html5lib/ihatexml.py +++ b/html5lib/_ihatexml.py @@ -175,9 +175,9 @@ def escapeRegexp(string): return string # output from the above -nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') +nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa -nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') +nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa # Simpler things nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]") @@ -186,7 +186,7 @@ def escapeRegexp(string): class InfosetFilter(object): replacementRegexp = re.compile(r"U[\dA-F]{5,5}") - def __init__(self, replaceChars=None, + def __init__(self, dropXmlnsLocalName=False, dropXmlnsAttrNs=False, preventDoubleDashComments=False, @@ -217,7 +217,7 @@ def coerceAttribute(self, name, namespace=None): else: return self.toXmlName(name) - def coerceElement(self, name, namespace=None): + def coerceElement(self, name): return self.toXmlName(name) def coerceComment(self, data): @@ -225,11 +225,14 @@ def coerceComment(self, data): while "--" in data: warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) data = data.replace("--", "- -") + if data.endswith("-"): + warnings.warn("Comments cannot end in a dash", DataLossWarning) + data += " " return data def coerceCharacters(self, data): if self.replaceFormFeedCharacters: - for i in range(data.count("\x0C")): + for _ in range(data.count("\x0C")): warnings.warn("Text cannot contain U+000C", DataLossWarning) data = data.replace("\x0C", " ") # Other non-xml characters diff --git a/html5lib/inputstream.py b/html5lib/_inputstream.py similarity index 83% rename from html5lib/inputstream.py rename to html5lib/_inputstream.py index 7020aa60..79f2331e 100644 --- a/html5lib/inputstream.py +++ b/html5lib/_inputstream.py @@ -1,13 +1,16 @@ from __future__ import absolute_import, division, unicode_literals -from six import text_type -from six.moves import http_client + +from six import text_type, binary_type +from six.moves import http_client, urllib import codecs import re +import webencodings + from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase -from .constants import encodings, ReparseException -from . import utils +from .constants import ReparseException +from . import _utils from io import StringIO @@ -16,12 +19,6 @@ except ImportError: BytesIO = StringIO -try: - from io import BufferedIOBase -except ImportError: - class BufferedIOBase(object): - pass - # Non-unicode versions of constants for use in the pre-parser spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters]) asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters]) @@ -29,15 +26,17 @@ class BufferedIOBase(object): spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"]) -invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" +invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa -if utils.supports_lone_surrogates: +if _utils.supports_lone_surrogates: # Use one extra step of indirection and create surrogates with - # unichr. Not using this indirection would introduce an illegal + # eval. Not using this indirection would introduce an illegal # unicode literal on platforms not supporting such lone # surrogates. - invalid_unicode_re = re.compile(invalid_unicode_no_surrogate + - eval('"\\uD800-\\uDFFF"')) + assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1 + invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] + + eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used + "]") else: invalid_unicode_re = re.compile(invalid_unicode_no_surrogate) @@ -129,10 +128,13 @@ def _readFromBuffer(self, bytes): return b"".join(rv) -def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True): - if isinstance(source, http_client.HTTPResponse): - # Work around Python bug #20007: read(0) closes the connection. - # http://bugs.python.org/issue20007 +def HTMLInputStream(source, **kwargs): + # Work around Python bug #20007: read(0) closes the connection. + # http://bugs.python.org/issue20007 + if (isinstance(source, http_client.HTTPResponse) or + # Also check for addinfourl wrapping HTTPResponse + (isinstance(source, urllib.response.addbase) and + isinstance(source.fp, http_client.HTTPResponse))): isUnicode = False elif hasattr(source, "read"): isUnicode = isinstance(source.read(0), text_type) @@ -140,12 +142,13 @@ def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True): isUnicode = isinstance(source, text_type) if isUnicode: - if encoding is not None: - raise TypeError("Cannot explicitly set an encoding with a unicode string") + encodings = [x for x in kwargs if x.endswith("_encoding")] + if encodings: + raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings) - return HTMLUnicodeInputStream(source) + return HTMLUnicodeInputStream(source, **kwargs) else: - return HTMLBinaryInputStream(source, encoding, parseMeta, chardet) + return HTMLBinaryInputStream(source, **kwargs) class HTMLUnicodeInputStream(object): @@ -171,27 +174,21 @@ def __init__(self, source): regardless of any BOM or later declaration (such as in a meta element) - parseMeta - Look for a element containing encoding information - """ - if not utils.supports_lone_surrogates: + if not _utils.supports_lone_surrogates: # Such platforms will have already checked for such # surrogate errors, so no need to do this checking. self.reportCharacterErrors = None - self.replaceCharactersRegexp = None elif len("\U0010FFFF") == 1: self.reportCharacterErrors = self.characterErrorsUCS4 - self.replaceCharactersRegexp = re.compile(eval('"[\\uD800-\\uDFFF]"')) else: self.reportCharacterErrors = self.characterErrorsUCS2 - self.replaceCharactersRegexp = re.compile( - eval('"([\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(? Normalized stream from source @@ -408,8 +404,6 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True): regardless of any BOM or later declaration (such as in a meta element) - parseMeta - Look for a element containing encoding information - """ # Raw Stream - for unicode objects this will encode to utf-8 and set # self.charEncoding as appropriate @@ -417,27 +411,28 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True): HTMLUnicodeInputStream.__init__(self, self.rawStream) - self.charEncoding = (codecName(encoding), "certain") - # Encoding Information # Number of bytes to use when looking for a meta element with # encoding information - self.numBytesMeta = 512 + self.numBytesMeta = 1024 # Number of bytes to use when using detecting encoding using chardet self.numBytesChardet = 100 - # Encoding to use if no other information can be found - self.defaultEncoding = "windows-1252" + # Things from args + self.override_encoding = override_encoding + self.transport_encoding = transport_encoding + self.same_origin_parent_encoding = same_origin_parent_encoding + self.likely_encoding = likely_encoding + self.default_encoding = default_encoding - # Detect encoding iff no explicit "transport level" encoding is supplied - if (self.charEncoding[0] is None): - self.charEncoding = self.detectEncoding(parseMeta, chardet) + # Determine encoding + self.charEncoding = self.determineEncoding(useChardet) + assert self.charEncoding[0] is not None # Call superclass self.reset() def reset(self): - self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream, - 'replace') + self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace') HTMLUnicodeInputStream.reset(self) def openStream(self, source): @@ -454,29 +449,50 @@ def openStream(self, source): try: stream.seek(stream.tell()) - except: + except: # pylint:disable=bare-except stream = BufferedStream(stream) return stream - def detectEncoding(self, parseMeta=True, chardet=True): - # First look for a BOM + def determineEncoding(self, chardet=True): + # BOMs take precedence over everything # This will also read past the BOM if present - encoding = self.detectBOM() - confidence = "certain" - # If there is no BOM need to look for meta elements with encoding - # information - if encoding is None and parseMeta: - encoding = self.detectEncodingMeta() - confidence = "tentative" - # Guess with chardet, if avaliable - if encoding is None and chardet: - confidence = "tentative" + charEncoding = self.detectBOM(), "certain" + if charEncoding[0] is not None: + return charEncoding + + # If we've been overriden, we've been overriden + charEncoding = lookupEncoding(self.override_encoding), "certain" + if charEncoding[0] is not None: + return charEncoding + + # Now check the transport layer + charEncoding = lookupEncoding(self.transport_encoding), "certain" + if charEncoding[0] is not None: + return charEncoding + + # Look for meta elements with encoding information + charEncoding = self.detectEncodingMeta(), "tentative" + if charEncoding[0] is not None: + return charEncoding + + # Parent document encoding + charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative" + if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"): + return charEncoding + + # "likely" encoding + charEncoding = lookupEncoding(self.likely_encoding), "tentative" + if charEncoding[0] is not None: + return charEncoding + + # Guess with chardet, if available + if chardet: try: - try: - from charade.universaldetector import UniversalDetector - except ImportError: - from chardet.universaldetector import UniversalDetector + from chardet.universaldetector import UniversalDetector + except ImportError: + pass + else: buffers = [] detector = UniversalDetector() while not detector.done: @@ -487,36 +503,33 @@ def detectEncoding(self, parseMeta=True, chardet=True): buffers.append(buffer) detector.feed(buffer) detector.close() - encoding = detector.result['encoding'] + encoding = lookupEncoding(detector.result['encoding']) self.rawStream.seek(0) - except ImportError: - pass - # If all else fails use the default encoding - if encoding is None: - confidence = "tentative" - encoding = self.defaultEncoding + if encoding is not None: + return encoding, "tentative" - # Substitute for equivalent encodings: - encodingSub = {"iso-8859-1": "windows-1252"} + # Try the default encoding + charEncoding = lookupEncoding(self.default_encoding), "tentative" + if charEncoding[0] is not None: + return charEncoding - if encoding.lower() in encodingSub: - encoding = encodingSub[encoding.lower()] - - return encoding, confidence + # Fallback to html5lib's default if even that hasn't worked + return lookupEncoding("windows-1252"), "tentative" def changeEncoding(self, newEncoding): assert self.charEncoding[1] != "certain" - newEncoding = codecName(newEncoding) - if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"): - newEncoding = "utf-8" + newEncoding = lookupEncoding(newEncoding) if newEncoding is None: return + if newEncoding.name in ("utf-16be", "utf-16le"): + newEncoding = lookupEncoding("utf-8") + assert newEncoding is not None elif newEncoding == self.charEncoding[0]: self.charEncoding = (self.charEncoding[0], "certain") else: self.rawStream.seek(0) - self.reset() self.charEncoding = (newEncoding, "certain") + self.reset() raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding)) def detectBOM(self): @@ -525,8 +538,8 @@ def detectBOM(self): encoding otherwise return None""" bomDict = { codecs.BOM_UTF8: 'utf-8', - codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be', - codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be' + codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be', + codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be' } # Go to beginning of file and read in 4 bytes @@ -546,9 +559,12 @@ def detectBOM(self): # Set the read position past the BOM if one was found, otherwise # set it to the start of the stream - self.rawStream.seek(encoding and seek or 0) - - return encoding + if encoding: + self.rawStream.seek(seek) + return lookupEncoding(encoding) + else: + self.rawStream.seek(0) + return None def detectEncodingMeta(self): """Report the encoding declared by the meta element @@ -559,8 +575,8 @@ def detectEncodingMeta(self): self.rawStream.seek(0) encoding = parser.getEncoding() - if encoding in ("utf-16", "utf-16-be", "utf-16-le"): - encoding = "utf-8" + if encoding is not None and encoding.name in ("utf-16be", "utf-16le"): + encoding = lookupEncoding("utf-8") return encoding @@ -574,6 +590,7 @@ def __new__(self, value): return bytes.__new__(self, value.lower()) def __init__(self, value): + # pylint:disable=unused-argument self._position = -1 def __iter__(self): @@ -684,7 +701,7 @@ def getEncoding(self): (b" 0: - for i in range(nullCount): + for _ in range(nullCount): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) data = data.replace("\u0000", "\uFFFD") diff --git a/html5lib/trie/__init__.py b/html5lib/_trie/__init__.py similarity index 73% rename from html5lib/trie/__init__.py rename to html5lib/_trie/__init__.py index a8cca8a9..a5ba4bf1 100644 --- a/html5lib/trie/__init__.py +++ b/html5lib/_trie/__init__.py @@ -4,9 +4,11 @@ Trie = PyTrie +# pylint:disable=wrong-import-position try: from .datrie import Trie as DATrie except ImportError: pass else: Trie = DATrie +# pylint:enable=wrong-import-position diff --git a/html5lib/trie/_base.py b/html5lib/_trie/_base.py similarity index 91% rename from html5lib/trie/_base.py rename to html5lib/_trie/_base.py index 724486b1..25eece46 100644 --- a/html5lib/trie/_base.py +++ b/html5lib/_trie/_base.py @@ -7,7 +7,8 @@ class Trie(Mapping): """Abstract base class for tries""" def keys(self, prefix=None): - keys = super().keys() + # pylint:disable=arguments-differ + keys = super(Trie, self).keys() if prefix is None: return set(keys) diff --git a/html5lib/trie/datrie.py b/html5lib/_trie/datrie.py similarity index 100% rename from html5lib/trie/datrie.py rename to html5lib/_trie/datrie.py diff --git a/html5lib/trie/py.py b/html5lib/_trie/py.py similarity index 100% rename from html5lib/trie/py.py rename to html5lib/_trie/py.py diff --git a/html5lib/utils.py b/html5lib/_utils.py similarity index 71% rename from html5lib/utils.py rename to html5lib/_utils.py index fdc18feb..03f0dab7 100644 --- a/html5lib/utils.py +++ b/html5lib/_utils.py @@ -1,5 +1,6 @@ from __future__ import absolute_import, division, unicode_literals +import sys from types import ModuleType from six import text_type @@ -12,9 +13,11 @@ __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", "surrogatePairToCodepoint", "moduleFactoryFactory", - "supports_lone_surrogates"] + "supports_lone_surrogates", "PY27"] +PY27 = sys.version_info[0] == 2 and sys.version_info[1] >= 7 + # Platforms not supporting lone surrogates (\uD800-\uDFFF) should be # caught by the below test. In general this would be any platform # using UTF-16 as its encoding of unicode strings, such as @@ -22,12 +25,12 @@ # surrogates, and there is no mechanism to further escape such # escapes. try: - _x = eval('"\\uD800"') + _x = eval('"\\uD800"') # pylint:disable=eval-used if not isinstance(_x, text_type): # We need this with u"" because of http://bugs.jython.org/issue2039 - _x = eval('u"\\uD800"') + _x = eval('u"\\uD800"') # pylint:disable=eval-used assert isinstance(_x, text_type) -except: +except: # pylint:disable=bare-except supports_lone_surrogates = False else: supports_lone_surrogates = True @@ -52,19 +55,20 @@ def __init__(self, items=()): # anything here. _dictEntries = [] for name, value in items: - if type(name) in (list, tuple, frozenset, set): + if isinstance(name, (list, tuple, frozenset, set)): for item in name: _dictEntries.append((item, value)) else: _dictEntries.append((name, value)) dict.__init__(self, _dictEntries) + assert len(self) == len(_dictEntries) self.default = None def __getitem__(self, key): return dict.get(self, key, self.default) -# Some utility functions to dal with weirdness around UCS2 vs UCS4 +# Some utility functions to deal with weirdness around UCS2 vs UCS4 # python builds def isSurrogatePair(data): @@ -91,13 +95,33 @@ def moduleFactory(baseModule, *args, **kwargs): else: name = b"_%s_factory" % baseModule.__name__ - if name in moduleCache: - return moduleCache[name] - else: + kwargs_tuple = tuple(kwargs.items()) + + try: + return moduleCache[name][args][kwargs_tuple] + except KeyError: mod = ModuleType(name) objs = factory(baseModule, *args, **kwargs) mod.__dict__.update(objs) - moduleCache[name] = mod + if "name" not in moduleCache: + moduleCache[name] = {} + if "args" not in moduleCache[name]: + moduleCache[name][args] = {} + if "kwargs" not in moduleCache[name][args]: + moduleCache[name][args][kwargs_tuple] = {} + moduleCache[name][args][kwargs_tuple] = mod return mod return moduleFactory + + +def memoize(func): + cache = {} + + def wrapped(*args, **kwargs): + key = (tuple(args), tuple(kwargs.items())) + if key not in cache: + cache[key] = func(*args, **kwargs) + return cache[key] + + return wrapped diff --git a/html5lib/constants.py b/html5lib/constants.py index d938e0ae..9e7541d3 100644 --- a/html5lib/constants.py +++ b/html5lib/constants.py @@ -283,6 +283,12 @@ "Element %(name)s not allowed in a non-html context", "unexpected-end-tag-before-html": "Unexpected end tag (%(name)s) before html.", + "unexpected-inhead-noscript-tag": + "Element %(name)s not allowed in a inhead-noscript context", + "eof-in-head-noscript": + "Unexpected end of file. Expected inhead-noscript content", + "char-in-head-noscript": + "Unexpected non-space character. Expected inhead-noscript content", "XXX-undefined-error": "Undefined error (this sucks and should be fixed)", } @@ -431,6 +437,73 @@ (namespaces["mathml"], "mtext") ]) +adjustSVGAttributes = { + "attributename": "attributeName", + "attributetype": "attributeType", + "basefrequency": "baseFrequency", + "baseprofile": "baseProfile", + "calcmode": "calcMode", + "clippathunits": "clipPathUnits", + "contentscripttype": "contentScriptType", + "contentstyletype": "contentStyleType", + "diffuseconstant": "diffuseConstant", + "edgemode": "edgeMode", + "externalresourcesrequired": "externalResourcesRequired", + "filterres": "filterRes", + "filterunits": "filterUnits", + "glyphref": "glyphRef", + "gradienttransform": "gradientTransform", + "gradientunits": "gradientUnits", + "kernelmatrix": "kernelMatrix", + "kernelunitlength": "kernelUnitLength", + "keypoints": "keyPoints", + "keysplines": "keySplines", + "keytimes": "keyTimes", + "lengthadjust": "lengthAdjust", + "limitingconeangle": "limitingConeAngle", + "markerheight": "markerHeight", + "markerunits": "markerUnits", + "markerwidth": "markerWidth", + "maskcontentunits": "maskContentUnits", + "maskunits": "maskUnits", + "numoctaves": "numOctaves", + "pathlength": "pathLength", + "patterncontentunits": "patternContentUnits", + "patterntransform": "patternTransform", + "patternunits": "patternUnits", + "pointsatx": "pointsAtX", + "pointsaty": "pointsAtY", + "pointsatz": "pointsAtZ", + "preservealpha": "preserveAlpha", + "preserveaspectratio": "preserveAspectRatio", + "primitiveunits": "primitiveUnits", + "refx": "refX", + "refy": "refY", + "repeatcount": "repeatCount", + "repeatdur": "repeatDur", + "requiredextensions": "requiredExtensions", + "requiredfeatures": "requiredFeatures", + "specularconstant": "specularConstant", + "specularexponent": "specularExponent", + "spreadmethod": "spreadMethod", + "startoffset": "startOffset", + "stddeviation": "stdDeviation", + "stitchtiles": "stitchTiles", + "surfacescale": "surfaceScale", + "systemlanguage": "systemLanguage", + "tablevalues": "tableValues", + "targetx": "targetX", + "targety": "targetY", + "textlength": "textLength", + "viewbox": "viewBox", + "viewtarget": "viewTarget", + "xchannelselector": "xChannelSelector", + "ychannelselector": "yChannelSelector", + "zoomandpan": "zoomAndPan" +} + +adjustMathMLAttributes = {"definitionurl": "definitionURL"} + adjustForeignAttributes = { "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]), "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]), @@ -2813,7 +2886,6 @@ 0x0d: "\u000D", 0x80: "\u20AC", 0x81: "\u0081", - 0x81: "\u0081", 0x82: "\u201A", 0x83: "\u0192", 0x84: "\u201E", @@ -2846,235 +2918,6 @@ 0x9F: "\u0178", } -encodings = { - '437': 'cp437', - '850': 'cp850', - '852': 'cp852', - '855': 'cp855', - '857': 'cp857', - '860': 'cp860', - '861': 'cp861', - '862': 'cp862', - '863': 'cp863', - '865': 'cp865', - '866': 'cp866', - '869': 'cp869', - 'ansix341968': 'ascii', - 'ansix341986': 'ascii', - 'arabic': 'iso8859-6', - 'ascii': 'ascii', - 'asmo708': 'iso8859-6', - 'big5': 'big5', - 'big5hkscs': 'big5hkscs', - 'chinese': 'gbk', - 'cp037': 'cp037', - 'cp1026': 'cp1026', - 'cp154': 'ptcp154', - 'cp367': 'ascii', - 'cp424': 'cp424', - 'cp437': 'cp437', - 'cp500': 'cp500', - 'cp775': 'cp775', - 'cp819': 'windows-1252', - 'cp850': 'cp850', - 'cp852': 'cp852', - 'cp855': 'cp855', - 'cp857': 'cp857', - 'cp860': 'cp860', - 'cp861': 'cp861', - 'cp862': 'cp862', - 'cp863': 'cp863', - 'cp864': 'cp864', - 'cp865': 'cp865', - 'cp866': 'cp866', - 'cp869': 'cp869', - 'cp936': 'gbk', - 'cpgr': 'cp869', - 'cpis': 'cp861', - 'csascii': 'ascii', - 'csbig5': 'big5', - 'cseuckr': 'cp949', - 'cseucpkdfmtjapanese': 'euc_jp', - 'csgb2312': 'gbk', - 'cshproman8': 'hp-roman8', - 'csibm037': 'cp037', - 'csibm1026': 'cp1026', - 'csibm424': 'cp424', - 'csibm500': 'cp500', - 'csibm855': 'cp855', - 'csibm857': 'cp857', - 'csibm860': 'cp860', - 'csibm861': 'cp861', - 'csibm863': 'cp863', - 'csibm864': 'cp864', - 'csibm865': 'cp865', - 'csibm866': 'cp866', - 'csibm869': 'cp869', - 'csiso2022jp': 'iso2022_jp', - 'csiso2022jp2': 'iso2022_jp_2', - 'csiso2022kr': 'iso2022_kr', - 'csiso58gb231280': 'gbk', - 'csisolatin1': 'windows-1252', - 'csisolatin2': 'iso8859-2', - 'csisolatin3': 'iso8859-3', - 'csisolatin4': 'iso8859-4', - 'csisolatin5': 'windows-1254', - 'csisolatin6': 'iso8859-10', - 'csisolatinarabic': 'iso8859-6', - 'csisolatincyrillic': 'iso8859-5', - 'csisolatingreek': 'iso8859-7', - 'csisolatinhebrew': 'iso8859-8', - 'cskoi8r': 'koi8-r', - 'csksc56011987': 'cp949', - 'cspc775baltic': 'cp775', - 'cspc850multilingual': 'cp850', - 'cspc862latinhebrew': 'cp862', - 'cspc8codepage437': 'cp437', - 'cspcp852': 'cp852', - 'csptcp154': 'ptcp154', - 'csshiftjis': 'shift_jis', - 'csunicode11utf7': 'utf-7', - 'cyrillic': 'iso8859-5', - 'cyrillicasian': 'ptcp154', - 'ebcdiccpbe': 'cp500', - 'ebcdiccpca': 'cp037', - 'ebcdiccpch': 'cp500', - 'ebcdiccphe': 'cp424', - 'ebcdiccpnl': 'cp037', - 'ebcdiccpus': 'cp037', - 'ebcdiccpwt': 'cp037', - 'ecma114': 'iso8859-6', - 'ecma118': 'iso8859-7', - 'elot928': 'iso8859-7', - 'eucjp': 'euc_jp', - 'euckr': 'cp949', - 'extendedunixcodepackedformatforjapanese': 'euc_jp', - 'gb18030': 'gb18030', - 'gb2312': 'gbk', - 'gb231280': 'gbk', - 'gbk': 'gbk', - 'greek': 'iso8859-7', - 'greek8': 'iso8859-7', - 'hebrew': 'iso8859-8', - 'hproman8': 'hp-roman8', - 'hzgb2312': 'hz', - 'ibm037': 'cp037', - 'ibm1026': 'cp1026', - 'ibm367': 'ascii', - 'ibm424': 'cp424', - 'ibm437': 'cp437', - 'ibm500': 'cp500', - 'ibm775': 'cp775', - 'ibm819': 'windows-1252', - 'ibm850': 'cp850', - 'ibm852': 'cp852', - 'ibm855': 'cp855', - 'ibm857': 'cp857', - 'ibm860': 'cp860', - 'ibm861': 'cp861', - 'ibm862': 'cp862', - 'ibm863': 'cp863', - 'ibm864': 'cp864', - 'ibm865': 'cp865', - 'ibm866': 'cp866', - 'ibm869': 'cp869', - 'iso2022jp': 'iso2022_jp', - 'iso2022jp2': 'iso2022_jp_2', - 'iso2022kr': 'iso2022_kr', - 'iso646irv1991': 'ascii', - 'iso646us': 'ascii', - 'iso88591': 'windows-1252', - 'iso885910': 'iso8859-10', - 'iso8859101992': 'iso8859-10', - 'iso885911987': 'windows-1252', - 'iso885913': 'iso8859-13', - 'iso885914': 'iso8859-14', - 'iso8859141998': 'iso8859-14', - 'iso885915': 'iso8859-15', - 'iso885916': 'iso8859-16', - 'iso8859162001': 'iso8859-16', - 'iso88592': 'iso8859-2', - 'iso885921987': 'iso8859-2', - 'iso88593': 'iso8859-3', - 'iso885931988': 'iso8859-3', - 'iso88594': 'iso8859-4', - 'iso885941988': 'iso8859-4', - 'iso88595': 'iso8859-5', - 'iso885951988': 'iso8859-5', - 'iso88596': 'iso8859-6', - 'iso885961987': 'iso8859-6', - 'iso88597': 'iso8859-7', - 'iso885971987': 'iso8859-7', - 'iso88598': 'iso8859-8', - 'iso885981988': 'iso8859-8', - 'iso88599': 'windows-1254', - 'iso885991989': 'windows-1254', - 'isoceltic': 'iso8859-14', - 'isoir100': 'windows-1252', - 'isoir101': 'iso8859-2', - 'isoir109': 'iso8859-3', - 'isoir110': 'iso8859-4', - 'isoir126': 'iso8859-7', - 'isoir127': 'iso8859-6', - 'isoir138': 'iso8859-8', - 'isoir144': 'iso8859-5', - 'isoir148': 'windows-1254', - 'isoir149': 'cp949', - 'isoir157': 'iso8859-10', - 'isoir199': 'iso8859-14', - 'isoir226': 'iso8859-16', - 'isoir58': 'gbk', - 'isoir6': 'ascii', - 'koi8r': 'koi8-r', - 'koi8u': 'koi8-u', - 'korean': 'cp949', - 'ksc5601': 'cp949', - 'ksc56011987': 'cp949', - 'ksc56011989': 'cp949', - 'l1': 'windows-1252', - 'l10': 'iso8859-16', - 'l2': 'iso8859-2', - 'l3': 'iso8859-3', - 'l4': 'iso8859-4', - 'l5': 'windows-1254', - 'l6': 'iso8859-10', - 'l8': 'iso8859-14', - 'latin1': 'windows-1252', - 'latin10': 'iso8859-16', - 'latin2': 'iso8859-2', - 'latin3': 'iso8859-3', - 'latin4': 'iso8859-4', - 'latin5': 'windows-1254', - 'latin6': 'iso8859-10', - 'latin8': 'iso8859-14', - 'latin9': 'iso8859-15', - 'ms936': 'gbk', - 'mskanji': 'shift_jis', - 'pt154': 'ptcp154', - 'ptcp154': 'ptcp154', - 'r8': 'hp-roman8', - 'roman8': 'hp-roman8', - 'shiftjis': 'shift_jis', - 'tis620': 'cp874', - 'unicode11utf7': 'utf-7', - 'us': 'ascii', - 'usascii': 'ascii', - 'utf16': 'utf-16', - 'utf16be': 'utf-16-be', - 'utf16le': 'utf-16-le', - 'utf8': 'utf-8', - 'windows1250': 'cp1250', - 'windows1251': 'cp1251', - 'windows1252': 'cp1252', - 'windows1253': 'cp1253', - 'windows1254': 'cp1254', - 'windows1255': 'cp1255', - 'windows1256': 'cp1256', - 'windows1257': 'cp1257', - 'windows1258': 'cp1258', - 'windows936': 'gbk', - 'x-x-big5': 'big5'} - tokenTypes = { "Doctype": 0, "Characters": 1, diff --git a/html5lib/filters/alphabeticalattributes.py b/html5lib/filters/alphabeticalattributes.py index fed6996c..4795baec 100644 --- a/html5lib/filters/alphabeticalattributes.py +++ b/html5lib/filters/alphabeticalattributes.py @@ -1,6 +1,6 @@ from __future__ import absolute_import, division, unicode_literals -from . import _base +from . import base try: from collections import OrderedDict @@ -8,9 +8,9 @@ from ordereddict import OrderedDict -class Filter(_base.Filter): +class Filter(base.Filter): def __iter__(self): - for token in _base.Filter.__iter__(self): + for token in base.Filter.__iter__(self): if token["type"] in ("StartTag", "EmptyTag"): attrs = OrderedDict() for name, value in sorted(token["data"].items(), diff --git a/html5lib/filters/_base.py b/html5lib/filters/base.py similarity index 100% rename from html5lib/filters/_base.py rename to html5lib/filters/base.py diff --git a/html5lib/filters/inject_meta_charset.py b/html5lib/filters/inject_meta_charset.py index ca33b70b..2059ec86 100644 --- a/html5lib/filters/inject_meta_charset.py +++ b/html5lib/filters/inject_meta_charset.py @@ -1,11 +1,11 @@ from __future__ import absolute_import, division, unicode_literals -from . import _base +from . import base -class Filter(_base.Filter): +class Filter(base.Filter): def __init__(self, source, encoding): - _base.Filter.__init__(self, source) + base.Filter.__init__(self, source) self.encoding = encoding def __iter__(self): @@ -13,7 +13,7 @@ def __iter__(self): meta_found = (self.encoding is None) pending = [] - for token in _base.Filter.__iter__(self): + for token in base.Filter.__iter__(self): type = token["type"] if type == "StartTag": if token["name"].lower() == "head": diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py index 8884696d..a9c0831a 100644 --- a/html5lib/filters/lint.py +++ b/html5lib/filters/lint.py @@ -1,90 +1,81 @@ from __future__ import absolute_import, division, unicode_literals -from . import _base -from ..constants import cdataElements, rcdataElements, voidElements +from six import text_type + +from . import base +from ..constants import namespaces, voidElements from ..constants import spaceCharacters spaceCharacters = "".join(spaceCharacters) -class LintError(Exception): - pass - +class Filter(base.Filter): + def __init__(self, source, require_matching_tags=True): + super(Filter, self).__init__(source) + self.require_matching_tags = require_matching_tags -class Filter(_base.Filter): def __iter__(self): open_elements = [] - contentModelFlag = "PCDATA" - for token in _base.Filter.__iter__(self): + for token in base.Filter.__iter__(self): type = token["type"] if type in ("StartTag", "EmptyTag"): + namespace = token["namespace"] name = token["name"] - if contentModelFlag != "PCDATA": - raise LintError("StartTag not in PCDATA content model flag: %(tag)s" % {"tag": name}) - if not isinstance(name, str): - raise LintError("Tag name is not a string: %(tag)r" % {"tag": name}) - if not name: - raise LintError("Empty tag name") - if type == "StartTag" and name in voidElements: - raise LintError("Void element reported as StartTag token: %(tag)s" % {"tag": name}) - elif type == "EmptyTag" and name not in voidElements: - raise LintError("Non-void element reported as EmptyTag token: %(tag)s" % {"tag": token["name"]}) - if type == "StartTag": - open_elements.append(name) - for name, value in token["data"]: - if not isinstance(name, str): - raise LintError("Attribute name is not a string: %(name)r" % {"name": name}) - if not name: - raise LintError("Empty attribute name") - if not isinstance(value, str): - raise LintError("Attribute value is not a string: %(value)r" % {"value": value}) - if name in cdataElements: - contentModelFlag = "CDATA" - elif name in rcdataElements: - contentModelFlag = "RCDATA" - elif name == "plaintext": - contentModelFlag = "PLAINTEXT" + assert namespace is None or isinstance(namespace, text_type) + assert namespace != "" + assert isinstance(name, text_type) + assert name != "" + assert isinstance(token["data"], dict) + if (not namespace or namespace == namespaces["html"]) and name in voidElements: + assert type == "EmptyTag" + else: + assert type == "StartTag" + if type == "StartTag" and self.require_matching_tags: + open_elements.append((namespace, name)) + for (namespace, name), value in token["data"].items(): + assert namespace is None or isinstance(namespace, text_type) + assert namespace != "" + assert isinstance(name, text_type) + assert name != "" + assert isinstance(value, text_type) elif type == "EndTag": + namespace = token["namespace"] name = token["name"] - if not isinstance(name, str): - raise LintError("Tag name is not a string: %(tag)r" % {"tag": name}) - if not name: - raise LintError("Empty tag name") - if name in voidElements: - raise LintError("Void element reported as EndTag token: %(tag)s" % {"tag": name}) - start_name = open_elements.pop() - if start_name != name: - raise LintError("EndTag (%(end)s) does not match StartTag (%(start)s)" % {"end": name, "start": start_name}) - contentModelFlag = "PCDATA" + assert namespace is None or isinstance(namespace, text_type) + assert namespace != "" + assert isinstance(name, text_type) + assert name != "" + if (not namespace or namespace == namespaces["html"]) and name in voidElements: + assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name} + elif self.require_matching_tags: + start = open_elements.pop() + assert start == (namespace, name) elif type == "Comment": - if contentModelFlag != "PCDATA": - raise LintError("Comment not in PCDATA content model flag") + data = token["data"] + assert isinstance(data, text_type) elif type in ("Characters", "SpaceCharacters"): data = token["data"] - if not isinstance(data, str): - raise LintError("Attribute name is not a string: %(name)r" % {"name": data}) - if not data: - raise LintError("%(type)s token with empty data" % {"type": type}) + assert isinstance(data, text_type) + assert data != "" if type == "SpaceCharacters": - data = data.strip(spaceCharacters) - if data: - raise LintError("Non-space character(s) found in SpaceCharacters token: %(token)r" % {"token": data}) + assert data.strip(spaceCharacters) == "" elif type == "Doctype": name = token["name"] - if contentModelFlag != "PCDATA": - raise LintError("Doctype not in PCDATA content model flag: %(name)s" % {"name": name}) - if not isinstance(name, str): - raise LintError("Tag name is not a string: %(tag)r" % {"tag": name}) - # XXX: what to do with token["data"] ? + assert name is None or isinstance(name, text_type) + assert token["publicId"] is None or isinstance(name, text_type) + assert token["systemId"] is None or isinstance(name, text_type) + + elif type == "Entity": + assert isinstance(token["name"], text_type) - elif type in ("ParseError", "SerializeError"): - pass + elif type == "SerializerError": + assert isinstance(token["data"], text_type) else: - raise LintError("Unknown token type: %(type)s" % {"type": type}) + assert False, "Unknown token type: %(type)s" % {"type": type} yield token diff --git a/html5lib/filters/optionaltags.py b/html5lib/filters/optionaltags.py index fefe0b30..f6edb734 100644 --- a/html5lib/filters/optionaltags.py +++ b/html5lib/filters/optionaltags.py @@ -1,9 +1,9 @@ from __future__ import absolute_import, division, unicode_literals -from . import _base +from . import base -class Filter(_base.Filter): +class Filter(base.Filter): def slider(self): previous1 = previous2 = None for token in self.source: @@ -11,7 +11,8 @@ def slider(self): yield previous2, previous1, token previous2 = previous1 previous1 = token - yield previous2, previous1, None + if previous1 is not None: + yield previous2, previous1, None def __iter__(self): for previous, token, next in self.slider(): @@ -58,7 +59,7 @@ def is_optional_start(self, tagname, previous, next): elif tagname == 'colgroup': # A colgroup element's start tag may be omitted if the first thing # inside the colgroup element is a col element, and if the element - # is not immediately preceeded by another colgroup element whose + # is not immediately preceded by another colgroup element whose # end tag has been omitted. if type in ("StartTag", "EmptyTag"): # XXX: we do not look at the preceding event, so instead we never @@ -70,7 +71,7 @@ def is_optional_start(self, tagname, previous, next): elif tagname == 'tbody': # A tbody element's start tag may be omitted if the first thing # inside the tbody element is a tr element, and if the element is - # not immediately preceeded by a tbody, thead, or tfoot element + # not immediately preceded by a tbody, thead, or tfoot element # whose end tag has been omitted. if type == "StartTag": # omit the thead and tfoot elements' end tag when they are diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py index b206b54e..b5ddcb93 100644 --- a/html5lib/filters/sanitizer.py +++ b/html5lib/filters/sanitizer.py @@ -1,12 +1,865 @@ from __future__ import absolute_import, division, unicode_literals -from . import _base -from ..sanitizer import HTMLSanitizerMixin +import re +from xml.sax.saxutils import escape, unescape +from six.moves import urllib_parse as urlparse + +from . import base +from ..constants import namespaces, prefixes + +__all__ = ["Filter"] + + +allowed_elements = frozenset(( + (namespaces['html'], 'a'), + (namespaces['html'], 'abbr'), + (namespaces['html'], 'acronym'), + (namespaces['html'], 'address'), + (namespaces['html'], 'area'), + (namespaces['html'], 'article'), + (namespaces['html'], 'aside'), + (namespaces['html'], 'audio'), + (namespaces['html'], 'b'), + (namespaces['html'], 'big'), + (namespaces['html'], 'blockquote'), + (namespaces['html'], 'br'), + (namespaces['html'], 'button'), + (namespaces['html'], 'canvas'), + (namespaces['html'], 'caption'), + (namespaces['html'], 'center'), + (namespaces['html'], 'cite'), + (namespaces['html'], 'code'), + (namespaces['html'], 'col'), + (namespaces['html'], 'colgroup'), + (namespaces['html'], 'command'), + (namespaces['html'], 'datagrid'), + (namespaces['html'], 'datalist'), + (namespaces['html'], 'dd'), + (namespaces['html'], 'del'), + (namespaces['html'], 'details'), + (namespaces['html'], 'dfn'), + (namespaces['html'], 'dialog'), + (namespaces['html'], 'dir'), + (namespaces['html'], 'div'), + (namespaces['html'], 'dl'), + (namespaces['html'], 'dt'), + (namespaces['html'], 'em'), + (namespaces['html'], 'event-source'), + (namespaces['html'], 'fieldset'), + (namespaces['html'], 'figcaption'), + (namespaces['html'], 'figure'), + (namespaces['html'], 'footer'), + (namespaces['html'], 'font'), + (namespaces['html'], 'form'), + (namespaces['html'], 'header'), + (namespaces['html'], 'h1'), + (namespaces['html'], 'h2'), + (namespaces['html'], 'h3'), + (namespaces['html'], 'h4'), + (namespaces['html'], 'h5'), + (namespaces['html'], 'h6'), + (namespaces['html'], 'hr'), + (namespaces['html'], 'i'), + (namespaces['html'], 'img'), + (namespaces['html'], 'input'), + (namespaces['html'], 'ins'), + (namespaces['html'], 'keygen'), + (namespaces['html'], 'kbd'), + (namespaces['html'], 'label'), + (namespaces['html'], 'legend'), + (namespaces['html'], 'li'), + (namespaces['html'], 'm'), + (namespaces['html'], 'map'), + (namespaces['html'], 'menu'), + (namespaces['html'], 'meter'), + (namespaces['html'], 'multicol'), + (namespaces['html'], 'nav'), + (namespaces['html'], 'nextid'), + (namespaces['html'], 'ol'), + (namespaces['html'], 'output'), + (namespaces['html'], 'optgroup'), + (namespaces['html'], 'option'), + (namespaces['html'], 'p'), + (namespaces['html'], 'pre'), + (namespaces['html'], 'progress'), + (namespaces['html'], 'q'), + (namespaces['html'], 's'), + (namespaces['html'], 'samp'), + (namespaces['html'], 'section'), + (namespaces['html'], 'select'), + (namespaces['html'], 'small'), + (namespaces['html'], 'sound'), + (namespaces['html'], 'source'), + (namespaces['html'], 'spacer'), + (namespaces['html'], 'span'), + (namespaces['html'], 'strike'), + (namespaces['html'], 'strong'), + (namespaces['html'], 'sub'), + (namespaces['html'], 'sup'), + (namespaces['html'], 'table'), + (namespaces['html'], 'tbody'), + (namespaces['html'], 'td'), + (namespaces['html'], 'textarea'), + (namespaces['html'], 'time'), + (namespaces['html'], 'tfoot'), + (namespaces['html'], 'th'), + (namespaces['html'], 'thead'), + (namespaces['html'], 'tr'), + (namespaces['html'], 'tt'), + (namespaces['html'], 'u'), + (namespaces['html'], 'ul'), + (namespaces['html'], 'var'), + (namespaces['html'], 'video'), + (namespaces['mathml'], 'maction'), + (namespaces['mathml'], 'math'), + (namespaces['mathml'], 'merror'), + (namespaces['mathml'], 'mfrac'), + (namespaces['mathml'], 'mi'), + (namespaces['mathml'], 'mmultiscripts'), + (namespaces['mathml'], 'mn'), + (namespaces['mathml'], 'mo'), + (namespaces['mathml'], 'mover'), + (namespaces['mathml'], 'mpadded'), + (namespaces['mathml'], 'mphantom'), + (namespaces['mathml'], 'mprescripts'), + (namespaces['mathml'], 'mroot'), + (namespaces['mathml'], 'mrow'), + (namespaces['mathml'], 'mspace'), + (namespaces['mathml'], 'msqrt'), + (namespaces['mathml'], 'mstyle'), + (namespaces['mathml'], 'msub'), + (namespaces['mathml'], 'msubsup'), + (namespaces['mathml'], 'msup'), + (namespaces['mathml'], 'mtable'), + (namespaces['mathml'], 'mtd'), + (namespaces['mathml'], 'mtext'), + (namespaces['mathml'], 'mtr'), + (namespaces['mathml'], 'munder'), + (namespaces['mathml'], 'munderover'), + (namespaces['mathml'], 'none'), + (namespaces['svg'], 'a'), + (namespaces['svg'], 'animate'), + (namespaces['svg'], 'animateColor'), + (namespaces['svg'], 'animateMotion'), + (namespaces['svg'], 'animateTransform'), + (namespaces['svg'], 'clipPath'), + (namespaces['svg'], 'circle'), + (namespaces['svg'], 'defs'), + (namespaces['svg'], 'desc'), + (namespaces['svg'], 'ellipse'), + (namespaces['svg'], 'font-face'), + (namespaces['svg'], 'font-face-name'), + (namespaces['svg'], 'font-face-src'), + (namespaces['svg'], 'g'), + (namespaces['svg'], 'glyph'), + (namespaces['svg'], 'hkern'), + (namespaces['svg'], 'linearGradient'), + (namespaces['svg'], 'line'), + (namespaces['svg'], 'marker'), + (namespaces['svg'], 'metadata'), + (namespaces['svg'], 'missing-glyph'), + (namespaces['svg'], 'mpath'), + (namespaces['svg'], 'path'), + (namespaces['svg'], 'polygon'), + (namespaces['svg'], 'polyline'), + (namespaces['svg'], 'radialGradient'), + (namespaces['svg'], 'rect'), + (namespaces['svg'], 'set'), + (namespaces['svg'], 'stop'), + (namespaces['svg'], 'svg'), + (namespaces['svg'], 'switch'), + (namespaces['svg'], 'text'), + (namespaces['svg'], 'title'), + (namespaces['svg'], 'tspan'), + (namespaces['svg'], 'use'), +)) + +allowed_attributes = frozenset(( + # HTML attributes + (None, 'abbr'), + (None, 'accept'), + (None, 'accept-charset'), + (None, 'accesskey'), + (None, 'action'), + (None, 'align'), + (None, 'alt'), + (None, 'autocomplete'), + (None, 'autofocus'), + (None, 'axis'), + (None, 'background'), + (None, 'balance'), + (None, 'bgcolor'), + (None, 'bgproperties'), + (None, 'border'), + (None, 'bordercolor'), + (None, 'bordercolordark'), + (None, 'bordercolorlight'), + (None, 'bottompadding'), + (None, 'cellpadding'), + (None, 'cellspacing'), + (None, 'ch'), + (None, 'challenge'), + (None, 'char'), + (None, 'charoff'), + (None, 'choff'), + (None, 'charset'), + (None, 'checked'), + (None, 'cite'), + (None, 'class'), + (None, 'clear'), + (None, 'color'), + (None, 'cols'), + (None, 'colspan'), + (None, 'compact'), + (None, 'contenteditable'), + (None, 'controls'), + (None, 'coords'), + (None, 'data'), + (None, 'datafld'), + (None, 'datapagesize'), + (None, 'datasrc'), + (None, 'datetime'), + (None, 'default'), + (None, 'delay'), + (None, 'dir'), + (None, 'disabled'), + (None, 'draggable'), + (None, 'dynsrc'), + (None, 'enctype'), + (None, 'end'), + (None, 'face'), + (None, 'for'), + (None, 'form'), + (None, 'frame'), + (None, 'galleryimg'), + (None, 'gutter'), + (None, 'headers'), + (None, 'height'), + (None, 'hidefocus'), + (None, 'hidden'), + (None, 'high'), + (None, 'href'), + (None, 'hreflang'), + (None, 'hspace'), + (None, 'icon'), + (None, 'id'), + (None, 'inputmode'), + (None, 'ismap'), + (None, 'keytype'), + (None, 'label'), + (None, 'leftspacing'), + (None, 'lang'), + (None, 'list'), + (None, 'longdesc'), + (None, 'loop'), + (None, 'loopcount'), + (None, 'loopend'), + (None, 'loopstart'), + (None, 'low'), + (None, 'lowsrc'), + (None, 'max'), + (None, 'maxlength'), + (None, 'media'), + (None, 'method'), + (None, 'min'), + (None, 'multiple'), + (None, 'name'), + (None, 'nohref'), + (None, 'noshade'), + (None, 'nowrap'), + (None, 'open'), + (None, 'optimum'), + (None, 'pattern'), + (None, 'ping'), + (None, 'point-size'), + (None, 'poster'), + (None, 'pqg'), + (None, 'preload'), + (None, 'prompt'), + (None, 'radiogroup'), + (None, 'readonly'), + (None, 'rel'), + (None, 'repeat-max'), + (None, 'repeat-min'), + (None, 'replace'), + (None, 'required'), + (None, 'rev'), + (None, 'rightspacing'), + (None, 'rows'), + (None, 'rowspan'), + (None, 'rules'), + (None, 'scope'), + (None, 'selected'), + (None, 'shape'), + (None, 'size'), + (None, 'span'), + (None, 'src'), + (None, 'start'), + (None, 'step'), + (None, 'style'), + (None, 'summary'), + (None, 'suppress'), + (None, 'tabindex'), + (None, 'target'), + (None, 'template'), + (None, 'title'), + (None, 'toppadding'), + (None, 'type'), + (None, 'unselectable'), + (None, 'usemap'), + (None, 'urn'), + (None, 'valign'), + (None, 'value'), + (None, 'variable'), + (None, 'volume'), + (None, 'vspace'), + (None, 'vrml'), + (None, 'width'), + (None, 'wrap'), + (namespaces['xml'], 'lang'), + # MathML attributes + (None, 'actiontype'), + (None, 'align'), + (None, 'columnalign'), + (None, 'columnalign'), + (None, 'columnalign'), + (None, 'columnlines'), + (None, 'columnspacing'), + (None, 'columnspan'), + (None, 'depth'), + (None, 'display'), + (None, 'displaystyle'), + (None, 'equalcolumns'), + (None, 'equalrows'), + (None, 'fence'), + (None, 'fontstyle'), + (None, 'fontweight'), + (None, 'frame'), + (None, 'height'), + (None, 'linethickness'), + (None, 'lspace'), + (None, 'mathbackground'), + (None, 'mathcolor'), + (None, 'mathvariant'), + (None, 'mathvariant'), + (None, 'maxsize'), + (None, 'minsize'), + (None, 'other'), + (None, 'rowalign'), + (None, 'rowalign'), + (None, 'rowalign'), + (None, 'rowlines'), + (None, 'rowspacing'), + (None, 'rowspan'), + (None, 'rspace'), + (None, 'scriptlevel'), + (None, 'selection'), + (None, 'separator'), + (None, 'stretchy'), + (None, 'width'), + (None, 'width'), + (namespaces['xlink'], 'href'), + (namespaces['xlink'], 'show'), + (namespaces['xlink'], 'type'), + # SVG attributes + (None, 'accent-height'), + (None, 'accumulate'), + (None, 'additive'), + (None, 'alphabetic'), + (None, 'arabic-form'), + (None, 'ascent'), + (None, 'attributeName'), + (None, 'attributeType'), + (None, 'baseProfile'), + (None, 'bbox'), + (None, 'begin'), + (None, 'by'), + (None, 'calcMode'), + (None, 'cap-height'), + (None, 'class'), + (None, 'clip-path'), + (None, 'color'), + (None, 'color-rendering'), + (None, 'content'), + (None, 'cx'), + (None, 'cy'), + (None, 'd'), + (None, 'dx'), + (None, 'dy'), + (None, 'descent'), + (None, 'display'), + (None, 'dur'), + (None, 'end'), + (None, 'fill'), + (None, 'fill-opacity'), + (None, 'fill-rule'), + (None, 'font-family'), + (None, 'font-size'), + (None, 'font-stretch'), + (None, 'font-style'), + (None, 'font-variant'), + (None, 'font-weight'), + (None, 'from'), + (None, 'fx'), + (None, 'fy'), + (None, 'g1'), + (None, 'g2'), + (None, 'glyph-name'), + (None, 'gradientUnits'), + (None, 'hanging'), + (None, 'height'), + (None, 'horiz-adv-x'), + (None, 'horiz-origin-x'), + (None, 'id'), + (None, 'ideographic'), + (None, 'k'), + (None, 'keyPoints'), + (None, 'keySplines'), + (None, 'keyTimes'), + (None, 'lang'), + (None, 'marker-end'), + (None, 'marker-mid'), + (None, 'marker-start'), + (None, 'markerHeight'), + (None, 'markerUnits'), + (None, 'markerWidth'), + (None, 'mathematical'), + (None, 'max'), + (None, 'min'), + (None, 'name'), + (None, 'offset'), + (None, 'opacity'), + (None, 'orient'), + (None, 'origin'), + (None, 'overline-position'), + (None, 'overline-thickness'), + (None, 'panose-1'), + (None, 'path'), + (None, 'pathLength'), + (None, 'points'), + (None, 'preserveAspectRatio'), + (None, 'r'), + (None, 'refX'), + (None, 'refY'), + (None, 'repeatCount'), + (None, 'repeatDur'), + (None, 'requiredExtensions'), + (None, 'requiredFeatures'), + (None, 'restart'), + (None, 'rotate'), + (None, 'rx'), + (None, 'ry'), + (None, 'slope'), + (None, 'stemh'), + (None, 'stemv'), + (None, 'stop-color'), + (None, 'stop-opacity'), + (None, 'strikethrough-position'), + (None, 'strikethrough-thickness'), + (None, 'stroke'), + (None, 'stroke-dasharray'), + (None, 'stroke-dashoffset'), + (None, 'stroke-linecap'), + (None, 'stroke-linejoin'), + (None, 'stroke-miterlimit'), + (None, 'stroke-opacity'), + (None, 'stroke-width'), + (None, 'systemLanguage'), + (None, 'target'), + (None, 'text-anchor'), + (None, 'to'), + (None, 'transform'), + (None, 'type'), + (None, 'u1'), + (None, 'u2'), + (None, 'underline-position'), + (None, 'underline-thickness'), + (None, 'unicode'), + (None, 'unicode-range'), + (None, 'units-per-em'), + (None, 'values'), + (None, 'version'), + (None, 'viewBox'), + (None, 'visibility'), + (None, 'width'), + (None, 'widths'), + (None, 'x'), + (None, 'x-height'), + (None, 'x1'), + (None, 'x2'), + (namespaces['xlink'], 'actuate'), + (namespaces['xlink'], 'arcrole'), + (namespaces['xlink'], 'href'), + (namespaces['xlink'], 'role'), + (namespaces['xlink'], 'show'), + (namespaces['xlink'], 'title'), + (namespaces['xlink'], 'type'), + (namespaces['xml'], 'base'), + (namespaces['xml'], 'lang'), + (namespaces['xml'], 'space'), + (None, 'y'), + (None, 'y1'), + (None, 'y2'), + (None, 'zoomAndPan'), +)) + +attr_val_is_uri = frozenset(( + (None, 'href'), + (None, 'src'), + (None, 'cite'), + (None, 'action'), + (None, 'longdesc'), + (None, 'poster'), + (None, 'background'), + (None, 'datasrc'), + (None, 'dynsrc'), + (None, 'lowsrc'), + (None, 'ping'), + (namespaces['xlink'], 'href'), + (namespaces['xml'], 'base'), +)) + +svg_attr_val_allows_ref = frozenset(( + (None, 'clip-path'), + (None, 'color-profile'), + (None, 'cursor'), + (None, 'fill'), + (None, 'filter'), + (None, 'marker'), + (None, 'marker-start'), + (None, 'marker-mid'), + (None, 'marker-end'), + (None, 'mask'), + (None, 'stroke'), +)) + +svg_allow_local_href = frozenset(( + (None, 'altGlyph'), + (None, 'animate'), + (None, 'animateColor'), + (None, 'animateMotion'), + (None, 'animateTransform'), + (None, 'cursor'), + (None, 'feImage'), + (None, 'filter'), + (None, 'linearGradient'), + (None, 'pattern'), + (None, 'radialGradient'), + (None, 'textpath'), + (None, 'tref'), + (None, 'set'), + (None, 'use') +)) + +allowed_css_properties = frozenset(( + 'azimuth', + 'background-color', + 'border-bottom-color', + 'border-collapse', + 'border-color', + 'border-left-color', + 'border-right-color', + 'border-top-color', + 'clear', + 'color', + 'cursor', + 'direction', + 'display', + 'elevation', + 'float', + 'font', + 'font-family', + 'font-size', + 'font-style', + 'font-variant', + 'font-weight', + 'height', + 'letter-spacing', + 'line-height', + 'overflow', + 'pause', + 'pause-after', + 'pause-before', + 'pitch', + 'pitch-range', + 'richness', + 'speak', + 'speak-header', + 'speak-numeral', + 'speak-punctuation', + 'speech-rate', + 'stress', + 'text-align', + 'text-decoration', + 'text-indent', + 'unicode-bidi', + 'vertical-align', + 'voice-family', + 'volume', + 'white-space', + 'width', +)) + +allowed_css_keywords = frozenset(( + 'auto', + 'aqua', + 'black', + 'block', + 'blue', + 'bold', + 'both', + 'bottom', + 'brown', + 'center', + 'collapse', + 'dashed', + 'dotted', + 'fuchsia', + 'gray', + 'green', + '!important', + 'italic', + 'left', + 'lime', + 'maroon', + 'medium', + 'none', + 'navy', + 'normal', + 'nowrap', + 'olive', + 'pointer', + 'purple', + 'red', + 'right', + 'solid', + 'silver', + 'teal', + 'top', + 'transparent', + 'underline', + 'white', + 'yellow', +)) + +allowed_svg_properties = frozenset(( + 'fill', + 'fill-opacity', + 'fill-rule', + 'stroke', + 'stroke-width', + 'stroke-linecap', + 'stroke-linejoin', + 'stroke-opacity', +)) + +allowed_protocols = frozenset(( + 'ed2k', + 'ftp', + 'http', + 'https', + 'irc', + 'mailto', + 'news', + 'gopher', + 'nntp', + 'telnet', + 'webcal', + 'xmpp', + 'callto', + 'feed', + 'urn', + 'aim', + 'rsync', + 'tag', + 'ssh', + 'sftp', + 'rtsp', + 'afs', + 'data', +)) + +allowed_content_types = frozenset(( + 'image/png', + 'image/jpeg', + 'image/gif', + 'image/webp', + 'image/bmp', + 'text/plain', +)) + + +data_content_type = re.compile(r''' + ^ + # Match a content type / + (?P[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) + # Match any character set and encoding + (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?) + |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?) + # Assume the rest is data + ,.* + $ + ''', + re.VERBOSE) + + +class Filter(base.Filter): + """ sanitization of XHTML+MathML+SVG and of inline style attributes.""" + def __init__(self, + source, + allowed_elements=allowed_elements, + allowed_attributes=allowed_attributes, + allowed_css_properties=allowed_css_properties, + allowed_css_keywords=allowed_css_keywords, + allowed_svg_properties=allowed_svg_properties, + allowed_protocols=allowed_protocols, + allowed_content_types=allowed_content_types, + attr_val_is_uri=attr_val_is_uri, + svg_attr_val_allows_ref=svg_attr_val_allows_ref, + svg_allow_local_href=svg_allow_local_href): + super(Filter, self).__init__(source) + self.allowed_elements = allowed_elements + self.allowed_attributes = allowed_attributes + self.allowed_css_properties = allowed_css_properties + self.allowed_css_keywords = allowed_css_keywords + self.allowed_svg_properties = allowed_svg_properties + self.allowed_protocols = allowed_protocols + self.allowed_content_types = allowed_content_types + self.attr_val_is_uri = attr_val_is_uri + self.svg_attr_val_allows_ref = svg_attr_val_allows_ref + self.svg_allow_local_href = svg_allow_local_href -class Filter(_base.Filter, HTMLSanitizerMixin): def __iter__(self): - for token in _base.Filter.__iter__(self): + for token in base.Filter.__iter__(self): token = self.sanitize_token(token) if token: yield token + + # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and + # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style + # attributes are parsed, and a restricted set, # specified by + # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through. + # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified + # in ALLOWED_PROTOCOLS are allowed. + # + # sanitize_html('') + # => <script> do_nasty_stuff() </script> + # sanitize_html('Click here for $100') + # => Click here for $100 + def sanitize_token(self, token): + + # accommodate filters which use token_type differently + token_type = token["type"] + if token_type in ("StartTag", "EndTag", "EmptyTag"): + name = token["name"] + namespace = token["namespace"] + if ((namespace, name) in self.allowed_elements or + (namespace is None and + (namespaces["html"], name) in self.allowed_elements)): + return self.allowed_token(token) + else: + return self.disallowed_token(token) + elif token_type == "Comment": + pass + else: + return token + + def allowed_token(self, token): + if "data" in token: + attrs = token["data"] + attr_names = set(attrs.keys()) + + # Remove forbidden attributes + for to_remove in (attr_names - self.allowed_attributes): + del token["data"][to_remove] + attr_names.remove(to_remove) + + # Remove attributes with disallowed URL values + for attr in (attr_names & self.attr_val_is_uri): + assert attr in attrs + # I don't have a clue where this regexp comes from or why it matches those + # characters, nor why we call unescape. I just know it's always been here. + # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all + # this will do is remove *more* than it otherwise would. + val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\s]+", '', + unescape(attrs[attr])).lower() + # remove replacement characters from unescaped characters + val_unescaped = val_unescaped.replace("\ufffd", "") + try: + uri = urlparse.urlparse(val_unescaped) + except ValueError: + uri = None + del attrs[attr] + if uri and uri.scheme: + if uri.scheme not in self.allowed_protocols: + del attrs[attr] + if uri.scheme == 'data': + m = data_content_type.match(uri.path) + if not m: + del attrs[attr] + elif m.group('content_type') not in self.allowed_content_types: + del attrs[attr] + + for attr in self.svg_attr_val_allows_ref: + if attr in attrs: + attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', + ' ', + unescape(attrs[attr])) + if (token["name"] in self.svg_allow_local_href and + (namespaces['xlink'], 'href') in attrs and re.search('^\s*[^#\s].*', + attrs[(namespaces['xlink'], 'href')])): + del attrs[(namespaces['xlink'], 'href')] + if (None, 'style') in attrs: + attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')]) + token["data"] = attrs + return token + + def disallowed_token(self, token): + token_type = token["type"] + if token_type == "EndTag": + token["data"] = "" % token["name"] + elif token["data"]: + assert token_type in ("StartTag", "EmptyTag") + attrs = [] + for (ns, name), v in token["data"].items(): + attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v))) + token["data"] = "<%s%s>" % (token["name"], ''.join(attrs)) + else: + token["data"] = "<%s>" % token["name"] + if token.get("selfClosing"): + token["data"] = token["data"][:-1] + "/>" + + token["type"] = "Characters" + + del token["name"] + return token + + def sanitize_css(self, style): + # disallow urls + style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) + + # gauntlet + if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): + return '' + if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): + return '' + + clean = [] + for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style): + if not value: + continue + if prop.lower() in self.allowed_css_properties: + clean.append(prop + ': ' + value + ';') + elif prop.split('-')[0].lower() in ['background', 'border', 'margin', + 'padding']: + for keyword in value.split(): + if keyword not in self.allowed_css_keywords and \ + not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa + break + else: + clean.append(prop + ': ' + value + ';') + elif prop.lower() in self.allowed_svg_properties: + clean.append(prop + ': ' + value + ';') + + return ' '.join(clean) diff --git a/html5lib/filters/whitespace.py b/html5lib/filters/whitespace.py index dfc60eeb..89210528 100644 --- a/html5lib/filters/whitespace.py +++ b/html5lib/filters/whitespace.py @@ -2,20 +2,20 @@ import re -from . import _base +from . import base from ..constants import rcdataElements, spaceCharacters spaceCharacters = "".join(spaceCharacters) SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) -class Filter(_base.Filter): +class Filter(base.Filter): spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) def __iter__(self): preserve = 0 - for token in _base.Filter.__iter__(self): + for token in base.Filter.__iter__(self): type = token["type"] if type == "StartTag" \ and (preserve or token["name"] in self.spacePreserveElements): diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 12aa6a35..2abd63e4 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -1,39 +1,44 @@ from __future__ import absolute_import, division, unicode_literals -from six import with_metaclass +from six import with_metaclass, viewkeys, PY3 import types -from . import inputstream -from . import tokenizer +try: + from collections import OrderedDict +except ImportError: + from ordereddict import OrderedDict + +from . import _inputstream +from . import _tokenizer from . import treebuilders -from .treebuilders._base import Marker - -from . import utils -from . import constants -from .constants import spaceCharacters, asciiUpper2Lower -from .constants import specialElements -from .constants import headingElements -from .constants import cdataElements, rcdataElements -from .constants import tokenTypes, ReparseException, namespaces -from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements -from .constants import adjustForeignAttributes as adjustForeignAttributesMap -from .constants import E - - -def parse(doc, treebuilder="etree", encoding=None, - namespaceHTMLElements=True): +from .treebuilders.base import Marker + +from . import _utils +from .constants import ( + spaceCharacters, asciiUpper2Lower, + specialElements, headingElements, cdataElements, rcdataElements, + tokenTypes, tagTokenTypes, + namespaces, + htmlIntegrationPointElements, mathmlTextIntegrationPointElements, + adjustForeignAttributes as adjustForeignAttributesMap, + adjustMathMLAttributes, adjustSVGAttributes, + E, + ReparseException +) + + +def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): """Parse a string or file-like object into a tree""" tb = treebuilders.getTreeBuilder(treebuilder) p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) - return p.parse(doc, encoding=encoding) + return p.parse(doc, **kwargs) -def parseFragment(doc, container="div", treebuilder="etree", encoding=None, - namespaceHTMLElements=True): +def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): tb = treebuilders.getTreeBuilder(treebuilder) p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) - return p.parseFragment(doc, container=container, encoding=encoding) + return p.parseFragment(doc, container=container, **kwargs) def method_decorator_metaclass(function): @@ -52,18 +57,13 @@ class HTMLParser(object): """HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML""" - def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer, - strict=False, namespaceHTMLElements=True, debug=False): + def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): """ strict - raise an exception when a parse error is encountered tree - a treebuilder class controlling the type of tree that will be returned. Built in treebuilders can be accessed through html5lib.treebuilders.getTreeBuilder(treeType) - - tokenizer - a class that provides a stream of tokens to the treebuilder. - This may be replaced for e.g. a sanitizer which converts some tags to - text """ # Raise an exception on the first error encountered @@ -72,29 +72,24 @@ def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer, if tree is None: tree = treebuilders.getTreeBuilder("etree") self.tree = tree(namespaceHTMLElements) - self.tokenizer_class = tokenizer self.errors = [] self.phases = dict([(name, cls(self, self.tree)) for name, cls in getPhases(debug).items()]) - def _parse(self, stream, innerHTML=False, container="div", - encoding=None, parseMeta=True, useChardet=True, **kwargs): + def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): self.innerHTMLMode = innerHTML self.container = container - self.tokenizer = self.tokenizer_class(stream, encoding=encoding, - parseMeta=parseMeta, - useChardet=useChardet, - parser=self, **kwargs) + self.scripting = scripting + self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) self.reset() - while True: - try: - self.mainLoop() - break - except ReparseException: - self.reset() + try: + self.mainLoop() + except ReparseException: + self.reset() + self.mainLoop() def reset(self): self.tree.reset() @@ -121,7 +116,7 @@ def reset(self): self.phase.insertHtmlElement() self.resetInsertionMode() else: - self.innerHTML = False + self.innerHTML = False # pylint:disable=redefined-variable-type self.phase = self.phases["initial"] self.lastPhase = None @@ -139,7 +134,7 @@ def documentEncoding(self): """ if not hasattr(self, 'tokenizer'): return None - return self.tokenizer.stream.charEncoding[0] + return self.tokenizer.stream.charEncoding[0].name def isHTMLIntegrationPoint(self, element): if (element.name == "annotation-xml" and @@ -164,8 +159,10 @@ def mainLoop(self): ParseErrorToken = tokenTypes["ParseError"] for token in self.normalizedTokens(): + prev_token = None new_token = token while new_token is not None: + prev_token = new_token currentNode = self.tree.openElements[-1] if self.tree.openElements else None currentNodeNamespace = currentNode.namespace if currentNode else None currentNodeName = currentNode.name if currentNode else None @@ -184,6 +181,7 @@ def mainLoop(self): type in (CharactersToken, SpaceCharactersToken))) or (currentNodeNamespace == namespaces["mathml"] and currentNodeName == "annotation-xml" and + type == StartTagToken and token["name"] == "svg") or (self.isHTMLIntegrationPoint(currentNode) and type in (StartTagToken, CharactersToken, SpaceCharactersToken))): @@ -204,10 +202,10 @@ def mainLoop(self): elif type == DoctypeToken: new_token = phase.processDoctype(new_token) - if (type == StartTagToken and token["selfClosing"] - and not token["selfClosingAcknowledged"]): + if (type == StartTagToken and prev_token["selfClosing"] and + not prev_token["selfClosingAcknowledged"]): self.parseError("non-void-element-with-trailing-solidus", - {"name": token["name"]}) + {"name": prev_token["name"]}) # When the loop finishes it's EOF reprocess = True @@ -222,7 +220,7 @@ def normalizedTokens(self): for token in self.tokenizer: yield self.normalizeToken(token) - def parse(self, stream, encoding=None, parseMeta=True, useChardet=True): + def parse(self, stream, *args, **kwargs): """Parse a HTML document into a well-formed tree stream - a filelike object or string containing the HTML to be parsed @@ -231,13 +229,13 @@ def parse(self, stream, encoding=None, parseMeta=True, useChardet=True): the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) + + scripting - treat noscript elements as if javascript was turned on """ - self._parse(stream, innerHTML=False, encoding=encoding, - parseMeta=parseMeta, useChardet=useChardet) + self._parse(stream, False, None, *args, **kwargs) return self.tree.getDocument() - def parseFragment(self, stream, container="div", encoding=None, - parseMeta=False, useChardet=True): + def parseFragment(self, stream, *args, **kwargs): """Parse a HTML fragment into a well-formed tree fragment container - name of the element we're setting the innerHTML property @@ -249,12 +247,16 @@ def parseFragment(self, stream, container="div", encoding=None, the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) + + scripting - treat noscript elements as if javascript was turned on """ - self._parse(stream, True, container=container, encoding=encoding) + self._parse(stream, True, *args, **kwargs) return self.tree.getFragment() - def parseError(self, errorcode="XXX-undefined-error", datavars={}): + def parseError(self, errorcode="XXX-undefined-error", datavars=None): # XXX The idea is to make errorcode mandatory. + if datavars is None: + datavars = {} self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) if self.strict: raise ParseError(E[errorcode] % datavars) @@ -263,98 +265,25 @@ def normalizeToken(self, token): """ HTML5 specific normalizations to the token stream """ if token["type"] == tokenTypes["StartTag"]: - token["data"] = dict(token["data"][::-1]) + raw = token["data"] + token["data"] = OrderedDict(raw) + if len(raw) > len(token["data"]): + # we had some duplicated attribute, fix so first wins + token["data"].update(raw[::-1]) return token def adjustMathMLAttributes(self, token): - replacements = {"definitionurl": "definitionURL"} - for k, v in replacements.items(): - if k in token["data"]: - token["data"][v] = token["data"][k] - del token["data"][k] + adjust_attributes(token, adjustMathMLAttributes) def adjustSVGAttributes(self, token): - replacements = { - "attributename": "attributeName", - "attributetype": "attributeType", - "basefrequency": "baseFrequency", - "baseprofile": "baseProfile", - "calcmode": "calcMode", - "clippathunits": "clipPathUnits", - "contentscripttype": "contentScriptType", - "contentstyletype": "contentStyleType", - "diffuseconstant": "diffuseConstant", - "edgemode": "edgeMode", - "externalresourcesrequired": "externalResourcesRequired", - "filterres": "filterRes", - "filterunits": "filterUnits", - "glyphref": "glyphRef", - "gradienttransform": "gradientTransform", - "gradientunits": "gradientUnits", - "kernelmatrix": "kernelMatrix", - "kernelunitlength": "kernelUnitLength", - "keypoints": "keyPoints", - "keysplines": "keySplines", - "keytimes": "keyTimes", - "lengthadjust": "lengthAdjust", - "limitingconeangle": "limitingConeAngle", - "markerheight": "markerHeight", - "markerunits": "markerUnits", - "markerwidth": "markerWidth", - "maskcontentunits": "maskContentUnits", - "maskunits": "maskUnits", - "numoctaves": "numOctaves", - "pathlength": "pathLength", - "patterncontentunits": "patternContentUnits", - "patterntransform": "patternTransform", - "patternunits": "patternUnits", - "pointsatx": "pointsAtX", - "pointsaty": "pointsAtY", - "pointsatz": "pointsAtZ", - "preservealpha": "preserveAlpha", - "preserveaspectratio": "preserveAspectRatio", - "primitiveunits": "primitiveUnits", - "refx": "refX", - "refy": "refY", - "repeatcount": "repeatCount", - "repeatdur": "repeatDur", - "requiredextensions": "requiredExtensions", - "requiredfeatures": "requiredFeatures", - "specularconstant": "specularConstant", - "specularexponent": "specularExponent", - "spreadmethod": "spreadMethod", - "startoffset": "startOffset", - "stddeviation": "stdDeviation", - "stitchtiles": "stitchTiles", - "surfacescale": "surfaceScale", - "systemlanguage": "systemLanguage", - "tablevalues": "tableValues", - "targetx": "targetX", - "targety": "targetY", - "textlength": "textLength", - "viewbox": "viewBox", - "viewtarget": "viewTarget", - "xchannelselector": "xChannelSelector", - "ychannelselector": "yChannelSelector", - "zoomandpan": "zoomAndPan" - } - for originalName in list(token["data"].keys()): - if originalName in replacements: - svgName = replacements[originalName] - token["data"][svgName] = token["data"][originalName] - del token["data"][originalName] + adjust_attributes(token, adjustSVGAttributes) def adjustForeignAttributes(self, token): - replacements = adjustForeignAttributesMap - - for originalName in token["data"].keys(): - if originalName in replacements: - foreignName = replacements[originalName] - token["data"][foreignName] = token["data"][originalName] - del token["data"][originalName] + adjust_attributes(token, adjustForeignAttributesMap) def reparseTokenNormal(self, token): + # pylint:disable=unused-argument self.parser.phase() def resetInsertionMode(self): @@ -419,11 +348,12 @@ def parseRCDataRawtext(self, token, contentType): self.phase = self.phases["text"] +@_utils.memoize def getPhases(debug): def log(function): """Logger that records which phase processes each token""" type_names = dict((value, key) for key, value in - constants.tokenTypes.items()) + tokenTypes.items()) def wrapped(self, *args, **kwargs): if function.__name__.startswith("process") and len(args) > 0: @@ -432,7 +362,7 @@ def wrapped(self, *args, **kwargs): info = {"type": type_names[token['type']]} except: raise - if token['type'] in constants.tagTokenTypes: + if token['type'] in tagTokenTypes: info["name"] = token['name'] self.parser.log.append((self.parser.tokenizer.state.__name__, @@ -451,6 +381,7 @@ def getMetaclass(use_metaclass, metaclass_func): else: return type + # pylint:disable=unused-argument class Phase(with_metaclass(getMetaclass(debug, log))): """Base class for helper object that implements each phase of processing """ @@ -517,77 +448,76 @@ def processDoctype(self, token): if publicId != "": publicId = publicId.translate(asciiUpper2Lower) - if (not correct or token["name"] != "html" - or publicId.startswith( - ("+//silmaril//dtd html pro v0r11 19970101//", - "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", - "-//as//dtd html 3.0 aswedit + extensions//", - "-//ietf//dtd html 2.0 level 1//", - "-//ietf//dtd html 2.0 level 2//", - "-//ietf//dtd html 2.0 strict level 1//", - "-//ietf//dtd html 2.0 strict level 2//", - "-//ietf//dtd html 2.0 strict//", - "-//ietf//dtd html 2.0//", - "-//ietf//dtd html 2.1e//", - "-//ietf//dtd html 3.0//", - "-//ietf//dtd html 3.2 final//", - "-//ietf//dtd html 3.2//", - "-//ietf//dtd html 3//", - "-//ietf//dtd html level 0//", - "-//ietf//dtd html level 1//", - "-//ietf//dtd html level 2//", - "-//ietf//dtd html level 3//", - "-//ietf//dtd html strict level 0//", - "-//ietf//dtd html strict level 1//", - "-//ietf//dtd html strict level 2//", - "-//ietf//dtd html strict level 3//", - "-//ietf//dtd html strict//", - "-//ietf//dtd html//", - "-//metrius//dtd metrius presentational//", - "-//microsoft//dtd internet explorer 2.0 html strict//", - "-//microsoft//dtd internet explorer 2.0 html//", - "-//microsoft//dtd internet explorer 2.0 tables//", - "-//microsoft//dtd internet explorer 3.0 html strict//", - "-//microsoft//dtd internet explorer 3.0 html//", - "-//microsoft//dtd internet explorer 3.0 tables//", - "-//netscape comm. corp.//dtd html//", - "-//netscape comm. corp.//dtd strict html//", - "-//o'reilly and associates//dtd html 2.0//", - "-//o'reilly and associates//dtd html extended 1.0//", - "-//o'reilly and associates//dtd html extended relaxed 1.0//", - "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", - "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", - "-//spyglass//dtd html 2.0 extended//", - "-//sq//dtd html 2.0 hotmetal + extensions//", - "-//sun microsystems corp.//dtd hotjava html//", - "-//sun microsystems corp.//dtd hotjava strict html//", - "-//w3c//dtd html 3 1995-03-24//", - "-//w3c//dtd html 3.2 draft//", - "-//w3c//dtd html 3.2 final//", - "-//w3c//dtd html 3.2//", - "-//w3c//dtd html 3.2s draft//", - "-//w3c//dtd html 4.0 frameset//", - "-//w3c//dtd html 4.0 transitional//", - "-//w3c//dtd html experimental 19960712//", - "-//w3c//dtd html experimental 970421//", - "-//w3c//dtd w3 html//", - "-//w3o//dtd w3 html 3.0//", - "-//webtechs//dtd mozilla html 2.0//", - "-//webtechs//dtd mozilla html//")) - or publicId in - ("-//w3o//dtd w3 html strict 3.0//en//", - "-/w3c/dtd html 4.0 transitional/en", - "html") - or publicId.startswith( - ("-//w3c//dtd html 4.01 frameset//", - "-//w3c//dtd html 4.01 transitional//")) and - systemId is None - or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): + if (not correct or token["name"] != "html" or + publicId.startswith( + ("+//silmaril//dtd html pro v0r11 19970101//", + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", + "-//as//dtd html 3.0 aswedit + extensions//", + "-//ietf//dtd html 2.0 level 1//", + "-//ietf//dtd html 2.0 level 2//", + "-//ietf//dtd html 2.0 strict level 1//", + "-//ietf//dtd html 2.0 strict level 2//", + "-//ietf//dtd html 2.0 strict//", + "-//ietf//dtd html 2.0//", + "-//ietf//dtd html 2.1e//", + "-//ietf//dtd html 3.0//", + "-//ietf//dtd html 3.2 final//", + "-//ietf//dtd html 3.2//", + "-//ietf//dtd html 3//", + "-//ietf//dtd html level 0//", + "-//ietf//dtd html level 1//", + "-//ietf//dtd html level 2//", + "-//ietf//dtd html level 3//", + "-//ietf//dtd html strict level 0//", + "-//ietf//dtd html strict level 1//", + "-//ietf//dtd html strict level 2//", + "-//ietf//dtd html strict level 3//", + "-//ietf//dtd html strict//", + "-//ietf//dtd html//", + "-//metrius//dtd metrius presentational//", + "-//microsoft//dtd internet explorer 2.0 html strict//", + "-//microsoft//dtd internet explorer 2.0 html//", + "-//microsoft//dtd internet explorer 2.0 tables//", + "-//microsoft//dtd internet explorer 3.0 html strict//", + "-//microsoft//dtd internet explorer 3.0 html//", + "-//microsoft//dtd internet explorer 3.0 tables//", + "-//netscape comm. corp.//dtd html//", + "-//netscape comm. corp.//dtd strict html//", + "-//o'reilly and associates//dtd html 2.0//", + "-//o'reilly and associates//dtd html extended 1.0//", + "-//o'reilly and associates//dtd html extended relaxed 1.0//", + "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", + "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", + "-//spyglass//dtd html 2.0 extended//", + "-//sq//dtd html 2.0 hotmetal + extensions//", + "-//sun microsystems corp.//dtd hotjava html//", + "-//sun microsystems corp.//dtd hotjava strict html//", + "-//w3c//dtd html 3 1995-03-24//", + "-//w3c//dtd html 3.2 draft//", + "-//w3c//dtd html 3.2 final//", + "-//w3c//dtd html 3.2//", + "-//w3c//dtd html 3.2s draft//", + "-//w3c//dtd html 4.0 frameset//", + "-//w3c//dtd html 4.0 transitional//", + "-//w3c//dtd html experimental 19960712//", + "-//w3c//dtd html experimental 970421//", + "-//w3c//dtd w3 html//", + "-//w3o//dtd w3 html 3.0//", + "-//webtechs//dtd mozilla html 2.0//", + "-//webtechs//dtd mozilla html//")) or + publicId in ("-//w3o//dtd w3 html strict 3.0//en//", + "-/w3c/dtd html 4.0 transitional/en", + "html") or + publicId.startswith( + ("-//w3c//dtd html 4.01 frameset//", + "-//w3c//dtd html 4.01 transitional//")) and + systemId is None or + systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): self.parser.compatMode = "quirks" elif (publicId.startswith( ("-//w3c//dtd xhtml 1.0 frameset//", - "-//w3c//dtd xhtml 1.0 transitional//")) - or publicId.startswith( + "-//w3c//dtd xhtml 1.0 transitional//")) or + publicId.startswith( ("-//w3c//dtd html 4.01 frameset//", "-//w3c//dtd html 4.01 transitional//")) and systemId is not None): @@ -660,13 +590,13 @@ class BeforeHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("head", self.startTagHead) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ (("head", "body", "html", "br"), self.endTagImplyHead) ]) self.endTagHandler.default = self.endTagOther @@ -706,10 +636,11 @@ class InHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("title", self.startTagTitle), - (("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle), + (("noframes", "style"), self.startTagNoFramesStyle), + ("noscript", self.startTagNoscript), ("script", self.startTagScript), (("base", "basefont", "bgsound", "command", "link"), self.startTagBaseLinkCommand), @@ -718,7 +649,7 @@ def __init__(self, parser, tree): ]) self.startTagHandler.default = self.startTagOther - self. endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("head", self.endTagHead), (("br", "html", "body"), self.endTagHtmlBodyBr) ]) @@ -760,18 +691,25 @@ def startTagMeta(self, token): # the abstract Unicode string, and just use the # ContentAttrParser on that, but using UTF-8 allows all chars # to be encoded and as a ASCII-superset works. - data = inputstream.EncodingBytes(attributes["content"].encode("utf-8")) - parser = inputstream.ContentAttrParser(data) + data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) + parser = _inputstream.ContentAttrParser(data) codec = parser.parse() self.parser.tokenizer.stream.changeEncoding(codec) def startTagTitle(self, token): self.parser.parseRCDataRawtext(token, "RCDATA") - def startTagNoScriptNoFramesStyle(self, token): + def startTagNoFramesStyle(self, token): # Need to decide whether to implement the scripting-disabled case self.parser.parseRCDataRawtext(token, "RAWTEXT") + def startTagNoscript(self, token): + if self.parser.scripting: + self.parser.parseRCDataRawtext(token, "RAWTEXT") + else: + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inHeadNoscript"] + def startTagScript(self, token): self.tree.insertElement(token) self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState @@ -797,15 +735,75 @@ def endTagOther(self, token): def anythingElse(self): self.endTagHead(impliedTagToken("head")) - # XXX If we implement a parser for which scripting is disabled we need to - # implement this phase. - # - # class InHeadNoScriptPhase(Phase): + class InHeadNoscriptPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + (("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand), + (("head", "noscript"), self.startTagHeadNoscript), + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + ("noscript", self.endTagNoscript), + ("br", self.endTagBr), + ]) + self.endTagHandler.default = self.endTagOther + + def processEOF(self): + self.parser.parseError("eof-in-head-noscript") + self.anythingElse() + return True + + def processComment(self, token): + return self.parser.phases["inHead"].processComment(token) + + def processCharacters(self, token): + self.parser.parseError("char-in-head-noscript") + self.anythingElse() + return token + + def processSpaceCharacters(self, token): + return self.parser.phases["inHead"].processSpaceCharacters(token) + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagBaseLinkCommand(self, token): + return self.parser.phases["inHead"].processStartTag(token) + + def startTagHeadNoscript(self, token): + self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) + + def startTagOther(self, token): + self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) + self.anythingElse() + return token + + def endTagNoscript(self, token): + node = self.parser.tree.openElements.pop() + assert node.name == "noscript", "Expected noscript got %s" % node.name + self.parser.phase = self.parser.phases["inHead"] + + def endTagBr(self, token): + self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) + self.anythingElse() + return token + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def anythingElse(self): + # Caller must raise parse error first! + self.endTagNoscript(impliedTagToken("noscript")) + class AfterHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("body", self.startTagBody), ("frameset", self.startTagFrameset), @@ -815,8 +813,8 @@ def __init__(self, parser, tree): ("head", self.startTagHead) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([(("body", "html", "br"), - self.endTagHtmlBodyBr)]) + self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), + self.endTagHtmlBodyBr)]) self.endTagHandler.default = self.endTagOther def processEOF(self): @@ -874,10 +872,10 @@ class InBodyPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - # Keep a ref to this for special handling of whitespace in

-            self.processSpaceCharactersNonPre = self.processSpaceCharacters
+            # Set this to the default handler
+            self.processSpaceCharacters = self.processSpaceCharactersNonPre
 
-            self.startTagHandler = utils.MethodDispatcher([
+            self.startTagHandler = _utils.MethodDispatcher([
                 ("html", self.startTagHtml),
                 (("base", "basefont", "bgsound", "command", "link", "meta",
                   "script", "style", "title"),
@@ -885,7 +883,7 @@ def __init__(self, parser, tree):
                 ("body", self.startTagBody),
                 ("frameset", self.startTagFrameset),
                 (("address", "article", "aside", "blockquote", "center", "details",
-                  "details", "dir", "div", "dl", "fieldset", "figcaption", "figure",
+                  "dir", "div", "dl", "fieldset", "figcaption", "figure",
                   "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
                   "section", "summary", "ul"),
                  self.startTagCloseP),
@@ -911,7 +909,8 @@ def __init__(self, parser, tree):
                 ("isindex", self.startTagIsIndex),
                 ("textarea", self.startTagTextarea),
                 ("iframe", self.startTagIFrame),
-                (("noembed", "noframes", "noscript"), self.startTagRawtext),
+                ("noscript", self.startTagNoscript),
+                (("noembed", "noframes"), self.startTagRawtext),
                 ("select", self.startTagSelect),
                 (("rp", "rt"), self.startTagRpRt),
                 (("option", "optgroup"), self.startTagOpt),
@@ -923,7 +922,7 @@ def __init__(self, parser, tree):
             ])
             self.startTagHandler.default = self.startTagOther
 
-            self.endTagHandler = utils.MethodDispatcher([
+            self.endTagHandler = _utils.MethodDispatcher([
                 ("body", self.endTagBody),
                 ("html", self.endTagHtml),
                 (("address", "article", "aside", "blockquote", "button", "center",
@@ -942,17 +941,9 @@ def __init__(self, parser, tree):
             self.endTagHandler.default = self.endTagOther
 
         def isMatchingFormattingElement(self, node1, node2):
-            if node1.name != node2.name or node1.namespace != node2.namespace:
-                return False
-            elif len(node1.attributes) != len(node2.attributes):
-                return False
-            else:
-                attributes1 = sorted(node1.attributes.items())
-                attributes2 = sorted(node2.attributes.items())
-                for attr1, attr2 in zip(attributes1, attributes2):
-                    if attr1 != attr2:
-                        return False
-            return True
+            return (node1.name == node2.name and
+                    node1.namespace == node2.namespace and
+                    node1.attributes == node2.attributes)
 
         # helper
         def addFormattingElement(self, token):
@@ -988,8 +979,8 @@ def processSpaceCharactersDropNewline(self, token):
             data = token["data"]
             self.processSpaceCharacters = self.processSpaceCharactersNonPre
             if (data.startswith("\n") and
-                self.tree.openElements[-1].name in ("pre", "listing", "textarea")
-                    and not self.tree.openElements[-1].hasContent()):
+                self.tree.openElements[-1].name in ("pre", "listing", "textarea") and
+                    not self.tree.openElements[-1].hasContent()):
                 data = data[1:]
             if data:
                 self.tree.reconstructActiveFormattingElements()
@@ -1007,7 +998,7 @@ def processCharacters(self, token):
                      for char in token["data"]])):
                 self.parser.framesetOK = False
 
-        def processSpaceCharacters(self, token):
+        def processSpaceCharactersNonPre(self, token):
             self.tree.reconstructActiveFormattingElements()
             self.tree.insertText(token["data"])
 
@@ -1016,8 +1007,8 @@ def startTagProcessInHead(self, token):
 
         def startTagBody(self, token):
             self.parser.parseError("unexpected-start-tag", {"name": "body"})
-            if (len(self.tree.openElements) == 1
-                    or self.tree.openElements[1].name != "body"):
+            if (len(self.tree.openElements) == 1 or
+                    self.tree.openElements[1].name != "body"):
                 assert self.parser.innerHTML
             else:
                 self.parser.framesetOK = False
@@ -1232,6 +1223,12 @@ def startTagIFrame(self, token):
             self.parser.framesetOK = False
             self.startTagRawtext(token)
 
+        def startTagNoscript(self, token):
+            if self.parser.scripting:
+                self.startTagRawtext(token)
+            else:
+                self.startTagOther(token)
+
         def startTagRawtext(self, token):
             """iframe, noembed noframes, noscript(if scripting enabled)"""
             self.parser.parseRCDataRawtext(token, "RAWTEXT")
@@ -1327,7 +1324,7 @@ def endTagBody(self, token):
                         # Not sure this is the correct name for the parse error
                         self.parser.parseError(
                             "expected-one-end-tag-but-got-another",
-                            {"expectedName": "body", "gotName": node.name})
+                            {"gotName": "body", "expectedName": node.name})
                         break
             self.parser.phase = self.parser.phases["afterBody"]
 
@@ -1595,9 +1592,9 @@ def endTagOther(self, token):
     class TextPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
-            self.startTagHandler = utils.MethodDispatcher([])
+            self.startTagHandler = _utils.MethodDispatcher([])
             self.startTagHandler.default = self.startTagOther
-            self.endTagHandler = utils.MethodDispatcher([
+            self.endTagHandler = _utils.MethodDispatcher([
                 ("script", self.endTagScript)])
             self.endTagHandler.default = self.endTagOther
 
@@ -1629,7 +1626,7 @@ class InTablePhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-table
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
-            self.startTagHandler = utils.MethodDispatcher([
+            self.startTagHandler = _utils.MethodDispatcher([
                 ("html", self.startTagHtml),
                 ("caption", self.startTagCaption),
                 ("colgroup", self.startTagColgroup),
@@ -1643,7 +1640,7 @@ def __init__(self, parser, tree):
             ])
             self.startTagHandler.default = self.startTagOther
 
-            self.endTagHandler = utils.MethodDispatcher([
+            self.endTagHandler = _utils.MethodDispatcher([
                 ("table", self.endTagTable),
                 (("body", "caption", "col", "colgroup", "html", "tbody", "td",
                   "tfoot", "th", "thead", "tr"), self.endTagIgnore)
@@ -1820,14 +1817,14 @@ class InCaptionPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
-            self.startTagHandler = utils.MethodDispatcher([
+            self.startTagHandler = _utils.MethodDispatcher([
                 ("html", self.startTagHtml),
                 (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
                   "thead", "tr"), self.startTagTableElement)
             ])
             self.startTagHandler.default = self.startTagOther
 
-            self.endTagHandler = utils.MethodDispatcher([
+            self.endTagHandler = _utils.MethodDispatcher([
                 ("caption", self.endTagCaption),
                 ("table", self.endTagTable),
                 (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
@@ -1892,13 +1889,13 @@ class InColumnGroupPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
-            self.startTagHandler = utils.MethodDispatcher([
+            self.startTagHandler = _utils.MethodDispatcher([
                 ("html", self.startTagHtml),
                 ("col", self.startTagCol)
             ])
             self.startTagHandler.default = self.startTagOther
 
-            self.endTagHandler = utils.MethodDispatcher([
+            self.endTagHandler = _utils.MethodDispatcher([
                 ("colgroup", self.endTagColgroup),
                 ("col", self.endTagCol)
             ])
@@ -1926,6 +1923,7 @@ def processCharacters(self, token):
         def startTagCol(self, token):
             self.tree.insertElement(token)
             self.tree.openElements.pop()
+            token["selfClosingAcknowledged"] = True
 
         def startTagOther(self, token):
             ignoreEndTag = self.ignoreEndTagColgroup()
@@ -1955,7 +1953,7 @@ class InTableBodyPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
-            self.startTagHandler = utils.MethodDispatcher([
+            self.startTagHandler = _utils.MethodDispatcher([
                 ("html", self.startTagHtml),
                 ("tr", self.startTagTr),
                 (("td", "th"), self.startTagTableCell),
@@ -1964,7 +1962,7 @@ def __init__(self, parser, tree):
             ])
             self.startTagHandler.default = self.startTagOther
 
-            self.endTagHandler = utils.MethodDispatcher([
+            self.endTagHandler = _utils.MethodDispatcher([
                 (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
                 ("table", self.endTagTable),
                 (("body", "caption", "col", "colgroup", "html", "td", "th",
@@ -2053,7 +2051,7 @@ class InRowPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-row
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
-            self.startTagHandler = utils.MethodDispatcher([
+            self.startTagHandler = _utils.MethodDispatcher([
                 ("html", self.startTagHtml),
                 (("td", "th"), self.startTagTableCell),
                 (("caption", "col", "colgroup", "tbody", "tfoot", "thead",
@@ -2061,7 +2059,7 @@ def __init__(self, parser, tree):
             ])
             self.startTagHandler.default = self.startTagOther
 
-            self.endTagHandler = utils.MethodDispatcher([
+            self.endTagHandler = _utils.MethodDispatcher([
                 ("tr", self.endTagTr),
                 ("table", self.endTagTable),
                 (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
@@ -2142,14 +2140,14 @@ class InCellPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
-            self.startTagHandler = utils.MethodDispatcher([
+            self.startTagHandler = _utils.MethodDispatcher([
                 ("html", self.startTagHtml),
                 (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
                   "thead", "tr"), self.startTagTableOther)
             ])
             self.startTagHandler.default = self.startTagOther
 
-            self.endTagHandler = utils.MethodDispatcher([
+            self.endTagHandler = _utils.MethodDispatcher([
                 (("td", "th"), self.endTagTableCell),
                 (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore),
                 (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply)
@@ -2218,7 +2216,7 @@ class InSelectPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
-            self.startTagHandler = utils.MethodDispatcher([
+            self.startTagHandler = _utils.MethodDispatcher([
                 ("html", self.startTagHtml),
                 ("option", self.startTagOption),
                 ("optgroup", self.startTagOptgroup),
@@ -2228,7 +2226,7 @@ def __init__(self, parser, tree):
             ])
             self.startTagHandler.default = self.startTagOther
 
-            self.endTagHandler = utils.MethodDispatcher([
+            self.endTagHandler = _utils.MethodDispatcher([
                 ("option", self.endTagOption),
                 ("optgroup", self.endTagOptgroup),
                 ("select", self.endTagSelect)
@@ -2318,13 +2316,13 @@ class InSelectInTablePhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
-            self.startTagHandler = utils.MethodDispatcher([
+            self.startTagHandler = _utils.MethodDispatcher([
                 (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
                  self.startTagTable)
             ])
             self.startTagHandler.default = self.startTagOther
 
-            self.endTagHandler = utils.MethodDispatcher([
+            self.endTagHandler = _utils.MethodDispatcher([
                 (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
                  self.endTagTable)
             ])
@@ -2445,7 +2443,7 @@ def processStartTag(self, token):
         def processEndTag(self, token):
             nodeIndex = len(self.tree.openElements) - 1
             node = self.tree.openElements[-1]
-            if node.name != token["name"]:
+            if node.name.translate(asciiUpper2Lower) != token["name"]:
                 self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
 
             while True:
@@ -2472,12 +2470,12 @@ class AfterBodyPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
-            self.startTagHandler = utils.MethodDispatcher([
+            self.startTagHandler = _utils.MethodDispatcher([
                 ("html", self.startTagHtml)
             ])
             self.startTagHandler.default = self.startTagOther
 
-            self.endTagHandler = utils.MethodDispatcher([("html", self.endTagHtml)])
+            self.endTagHandler = _utils.MethodDispatcher([("html", self.endTagHtml)])
             self.endTagHandler.default = self.endTagOther
 
         def processEOF(self):
@@ -2520,7 +2518,7 @@ class InFramesetPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
-            self.startTagHandler = utils.MethodDispatcher([
+            self.startTagHandler = _utils.MethodDispatcher([
                 ("html", self.startTagHtml),
                 ("frameset", self.startTagFrameset),
                 ("frame", self.startTagFrame),
@@ -2528,7 +2526,7 @@ def __init__(self, parser, tree):
             ])
             self.startTagHandler.default = self.startTagOther
 
-            self.endTagHandler = utils.MethodDispatcher([
+            self.endTagHandler = _utils.MethodDispatcher([
                 ("frameset", self.endTagFrameset)
             ])
             self.endTagHandler.default = self.endTagOther
@@ -2564,7 +2562,7 @@ def endTagFrameset(self, token):
                 self.tree.openElements.pop()
             if (not self.parser.innerHTML and
                     self.tree.openElements[-1].name != "frameset"):
-                # If we're not in innerHTML mode and the the current node is not a
+                # If we're not in innerHTML mode and the current node is not a
                 # "frameset" element (anymore) then switch.
                 self.parser.phase = self.parser.phases["afterFrameset"]
 
@@ -2577,13 +2575,13 @@ class AfterFramesetPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
-            self.startTagHandler = utils.MethodDispatcher([
+            self.startTagHandler = _utils.MethodDispatcher([
                 ("html", self.startTagHtml),
                 ("noframes", self.startTagNoframes)
             ])
             self.startTagHandler.default = self.startTagOther
 
-            self.endTagHandler = utils.MethodDispatcher([
+            self.endTagHandler = _utils.MethodDispatcher([
                 ("html", self.endTagHtml)
             ])
             self.endTagHandler.default = self.endTagOther
@@ -2613,7 +2611,7 @@ class AfterAfterBodyPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
-            self.startTagHandler = utils.MethodDispatcher([
+            self.startTagHandler = _utils.MethodDispatcher([
                 ("html", self.startTagHtml)
             ])
             self.startTagHandler.default = self.startTagOther
@@ -2651,7 +2649,7 @@ class AfterAfterFramesetPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
-            self.startTagHandler = utils.MethodDispatcher([
+            self.startTagHandler = _utils.MethodDispatcher([
                 ("html", self.startTagHtml),
                 ("noframes", self.startTagNoFrames)
             ])
@@ -2682,13 +2680,14 @@ def startTagOther(self, token):
         def processEndTag(self, token):
             self.parser.parseError("expected-eof-but-got-end-tag",
                                    {"name": token["name"]})
+    # pylint:enable=unused-argument
 
     return {
         "initial": InitialPhase,
         "beforeHtml": BeforeHtmlPhase,
         "beforeHead": BeforeHeadPhase,
         "inHead": InHeadPhase,
-        # XXX "inHeadNoscript": InHeadNoScriptPhase,
+        "inHeadNoscript": InHeadNoscriptPhase,
         "afterHead": AfterHeadPhase,
         "inBody": InBodyPhase,
         "text": TextPhase,
@@ -2711,6 +2710,16 @@ def processEndTag(self, token):
     }
 
 
+def adjust_attributes(token, replacements):
+    if PY3 or _utils.PY27:
+        needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
+    else:
+        needs_adjustment = frozenset(token['data']) & frozenset(replacements)
+    if needs_adjustment:
+        token['data'] = OrderedDict((replacements.get(k, k), v)
+                                    for k, v in token['data'].items())
+
+
 def impliedTagToken(name, type="EndTag", attributes=None,
                     selfClosing=False):
     if attributes is None:
diff --git a/html5lib/sanitizer.py b/html5lib/sanitizer.py
deleted file mode 100644
index b714e8c9..00000000
--- a/html5lib/sanitizer.py
+++ /dev/null
@@ -1,300 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import re
-from xml.sax.saxutils import escape, unescape
-from six.moves import urllib_parse as urlparse
-
-from .tokenizer import HTMLTokenizer
-from .constants import tokenTypes
-
-
-content_type_rgx = re.compile(r'''
-                               ^
-                               # Match a content type /
-                               (?P[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
-                               # Match any character set and encoding
-                               (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
-                                 |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
-                               # Assume the rest is data
-                               ,.*
-                               $
-                               ''',
-                              re.VERBOSE)
-
-
-class HTMLSanitizerMixin(object):
-    """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
-
-    acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
-                           'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
-                           'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
-                           'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
-                           'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
-                           'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
-                           'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
-                           'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
-                           'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
-                           'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
-                           'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
-                           'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
-                           'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
-
-    mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
-                       'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
-                       'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
-                       'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
-                       'munderover', 'none']
-
-    svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
-                    'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
-                    'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
-                    'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
-                    'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
-                    'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
-
-    acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
-                             'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
-                             'background', 'balance', 'bgcolor', 'bgproperties', 'border',
-                             'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
-                             'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
-                             'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
-                             'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
-                             'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
-                             'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
-                             'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
-                             'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
-                             'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
-                             'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
-                             'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
-                             'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
-                             'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
-                             'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
-                             'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
-                             'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
-                             'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
-                             'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
-                             'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
-                             'width', 'wrap', 'xml:lang']
-
-    mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
-                         'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
-                         'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
-                         'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
-                         'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
-                         'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
-                         'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
-                         'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
-                         'xlink:type', 'xmlns', 'xmlns:xlink']
-
-    svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
-                      'arabic-form', 'ascent', 'attributeName', 'attributeType',
-                      'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
-                      'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
-                      'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
-                      'fill-opacity', 'fill-rule', 'font-family', 'font-size',
-                      'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
-                      'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
-                      'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
-                      'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
-                      'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
-                      'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
-                      'opacity', 'orient', 'origin', 'overline-position',
-                      'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
-                      'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
-                      'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
-                      'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
-                      'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
-                      'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
-                      'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
-                      'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
-                      'transform', 'type', 'u1', 'u2', 'underline-position',
-                      'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
-                      'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
-                      'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
-                      'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
-                      'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
-                      'y1', 'y2', 'zoomAndPan']
-
-    attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster', 'background', 'datasrc',
-                       'dynsrc', 'lowsrc', 'ping', 'poster', 'xlink:href', 'xml:base']
-
-    svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
-                               'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
-                               'mask', 'stroke']
-
-    svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
-                            'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
-                            'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
-                            'set', 'use']
-
-    acceptable_css_properties = ['azimuth', 'background-color',
-                                 'border-bottom-color', 'border-collapse', 'border-color',
-                                 'border-left-color', 'border-right-color', 'border-top-color', 'clear',
-                                 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
-                                 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
-                                 'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
-                                 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
-                                 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
-                                 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
-                                 'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
-                                 'white-space', 'width']
-
-    acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
-                               'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
-                               'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
-                               'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
-                               'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
-                               'transparent', 'underline', 'white', 'yellow']
-
-    acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule',
-                                 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
-                                 'stroke-opacity']
-
-    acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
-                            'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
-                            'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
-                            'ssh', 'sftp', 'rtsp', 'afs', 'data']
-
-    acceptable_content_types = ['image/png', 'image/jpeg', 'image/gif', 'image/webp', 'image/bmp', 'text/plain']
-
-    # subclasses may define their own versions of these constants
-    allowed_elements = acceptable_elements + mathml_elements + svg_elements
-    allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes
-    allowed_css_properties = acceptable_css_properties
-    allowed_css_keywords = acceptable_css_keywords
-    allowed_svg_properties = acceptable_svg_properties
-    allowed_protocols = acceptable_protocols
-    allowed_content_types = acceptable_content_types
-
-    # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
-    # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
-    # attributes are parsed, and a restricted set, # specified by
-    # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
-    # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
-    # in ALLOWED_PROTOCOLS are allowed.
-    #
-    #   sanitize_html('')
-    #    => <script> do_nasty_stuff() </script>
-    #   sanitize_html('Click here for $100')
-    #    => Click here for $100
-    def sanitize_token(self, token):
-
-        # accommodate filters which use token_type differently
-        token_type = token["type"]
-        if token_type in list(tokenTypes.keys()):
-            token_type = tokenTypes[token_type]
-
-        if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
-                          tokenTypes["EmptyTag"]):
-            if token["name"] in self.allowed_elements:
-                return self.allowed_token(token, token_type)
-            else:
-                return self.disallowed_token(token, token_type)
-        elif token_type == tokenTypes["Comment"]:
-            pass
-        else:
-            return token
-
-    def allowed_token(self, token, token_type):
-        if "data" in token:
-            attrs = dict([(name, val) for name, val in
-                          token["data"][::-1]
-                          if name in self.allowed_attributes])
-            for attr in self.attr_val_is_uri:
-                if attr not in attrs:
-                    continue
-                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
-                                       unescape(attrs[attr])).lower()
-                # remove replacement characters from unescaped characters
-                val_unescaped = val_unescaped.replace("\ufffd", "")
-                try:
-                    uri = urlparse.urlparse(val_unescaped)
-                except ValueError:
-                    uri = None
-                    del attrs[attr]
-                if uri and uri.scheme:
-                    if uri.scheme not in self.allowed_protocols:
-                        del attrs[attr]
-                    if uri.scheme == 'data':
-                        m = content_type_rgx.match(uri.path)
-                        if not m:
-                            del attrs[attr]
-                        elif m.group('content_type') not in self.allowed_content_types:
-                            del attrs[attr]
-
-            for attr in self.svg_attr_val_allows_ref:
-                if attr in attrs:
-                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
-                                         ' ',
-                                         unescape(attrs[attr]))
-            if (token["name"] in self.svg_allow_local_href and
-                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
-                                                    attrs['xlink:href'])):
-                del attrs['xlink:href']
-            if 'style' in attrs:
-                attrs['style'] = self.sanitize_css(attrs['style'])
-            token["data"] = [[name, val] for name, val in list(attrs.items())]
-        return token
-
-    def disallowed_token(self, token, token_type):
-        if token_type == tokenTypes["EndTag"]:
-            token["data"] = "" % token["name"]
-        elif token["data"]:
-            attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]])
-            token["data"] = "<%s%s>" % (token["name"], attrs)
-        else:
-            token["data"] = "<%s>" % token["name"]
-        if token.get("selfClosing"):
-            token["data"] = token["data"][:-1] + "/>"
-
-        if token["type"] in list(tokenTypes.keys()):
-            token["type"] = "Characters"
-        else:
-            token["type"] = tokenTypes["Characters"]
-
-        del token["name"]
-        return token
-
-    def sanitize_css(self, style):
-        # disallow urls
-        style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
-
-        # gauntlet
-        if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
-            return ''
-        if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
-            return ''
-
-        clean = []
-        for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
-            if not value:
-                continue
-            if prop.lower() in self.allowed_css_properties:
-                clean.append(prop + ': ' + value + ';')
-            elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
-                                                'padding']:
-                for keyword in value.split():
-                    if keyword not in self.acceptable_css_keywords and \
-                            not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
-                        break
-                else:
-                    clean.append(prop + ': ' + value + ';')
-            elif prop.lower() in self.allowed_svg_properties:
-                clean.append(prop + ': ' + value + ';')
-
-        return ' '.join(clean)
-
-
-class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
-    def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
-                 lowercaseElementName=False, lowercaseAttrName=False, parser=None):
-        # Change case matching defaults as we only output lowercase html anyway
-        # This solution doesn't seem ideal...
-        HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
-                               lowercaseElementName, lowercaseAttrName, parser=parser)
-
-    def __iter__(self):
-        for token in HTMLTokenizer.__iter__(self):
-            token = self.sanitize_token(token)
-            if token:
-                yield token
diff --git a/html5lib/serializer/htmlserializer.py b/html5lib/serializer.py
similarity index 68%
rename from html5lib/serializer/htmlserializer.py
rename to html5lib/serializer.py
index be4d6344..103dd206 100644
--- a/html5lib/serializer/htmlserializer.py
+++ b/html5lib/serializer.py
@@ -1,79 +1,87 @@
 from __future__ import absolute_import, division, unicode_literals
 from six import text_type
 
-try:
-    from functools import reduce
-except ImportError:
-    pass
+import re
+
+from codecs import register_error, xmlcharrefreplace_errors
 
-from ..constants import voidElements, booleanAttributes, spaceCharacters
-from ..constants import rcdataElements, entities, xmlEntities
-from .. import utils
+from .constants import voidElements, booleanAttributes, spaceCharacters
+from .constants import rcdataElements, entities, xmlEntities
+from . import treewalkers, _utils
 from xml.sax.saxutils import escape
 
-spaceCharacters = "".join(spaceCharacters)
-
-try:
-    from codecs import register_error, xmlcharrefreplace_errors
-except ImportError:
-    unicode_encode_errors = "strict"
-else:
-    unicode_encode_errors = "htmlentityreplace"
-
-    encode_entity_map = {}
-    is_ucs4 = len("\U0010FFFF") == 1
-    for k, v in list(entities.items()):
-        # skip multi-character entities
-        if ((is_ucs4 and len(v) > 1) or
-                (not is_ucs4 and len(v) > 2)):
-            continue
-        if v != "&":
-            if len(v) == 2:
-                v = utils.surrogatePairToCodepoint(v)
-            else:
-                v = ord(v)
-            if v not in encode_entity_map or k.islower():
-                # prefer < over < and similarly for &, >, etc.
-                encode_entity_map[v] = k
-
-    def htmlentityreplace_errors(exc):
-        if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
-            res = []
-            codepoints = []
-            skip = False
-            for i, c in enumerate(exc.object[exc.start:exc.end]):
-                if skip:
-                    skip = False
-                    continue
-                index = i + exc.start
-                if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
-                    codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
-                    skip = True
-                else:
-                    codepoint = ord(c)
-                codepoints.append(codepoint)
-            for cp in codepoints:
-                e = encode_entity_map.get(cp)
-                if e:
-                    res.append("&")
-                    res.append(e)
-                    if not e.endswith(";"):
-                        res.append(";")
-                else:
-                    res.append("&#x%s;" % (hex(cp)[2:]))
-            return ("".join(res), exc.end)
+_quoteAttributeSpecChars = "".join(spaceCharacters) + "\"'=<>`"
+_quoteAttributeSpec = re.compile("[" + _quoteAttributeSpecChars + "]")
+_quoteAttributeLegacy = re.compile("[" + _quoteAttributeSpecChars +
+                                   "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
+                                   "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
+                                   "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+                                   "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
+                                   "\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
+                                   "\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
+                                   "\u3000]")
+
+
+_encode_entity_map = {}
+_is_ucs4 = len("\U0010FFFF") == 1
+for k, v in list(entities.items()):
+    # skip multi-character entities
+    if ((_is_ucs4 and len(v) > 1) or
+            (not _is_ucs4 and len(v) > 2)):
+        continue
+    if v != "&":
+        if len(v) == 2:
+            v = _utils.surrogatePairToCodepoint(v)
         else:
-            return xmlcharrefreplace_errors(exc)
+            v = ord(v)
+        if v not in _encode_entity_map or k.islower():
+            # prefer < over < and similarly for &, >, etc.
+            _encode_entity_map[v] = k
+
+
+def htmlentityreplace_errors(exc):
+    if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
+        res = []
+        codepoints = []
+        skip = False
+        for i, c in enumerate(exc.object[exc.start:exc.end]):
+            if skip:
+                skip = False
+                continue
+            index = i + exc.start
+            if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
+                codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2])
+                skip = True
+            else:
+                codepoint = ord(c)
+            codepoints.append(codepoint)
+        for cp in codepoints:
+            e = _encode_entity_map.get(cp)
+            if e:
+                res.append("&")
+                res.append(e)
+                if not e.endswith(";"):
+                    res.append(";")
+            else:
+                res.append("&#x%s;" % (hex(cp)[2:]))
+        return ("".join(res), exc.end)
+    else:
+        return xmlcharrefreplace_errors(exc)
+
+register_error("htmlentityreplace", htmlentityreplace_errors)
 
-    register_error(unicode_encode_errors, htmlentityreplace_errors)
 
-    del register_error
+def serialize(input, tree="etree", encoding=None, **serializer_opts):
+    # XXX: Should we cache this?
+    walker = treewalkers.getTreeWalker(tree)
+    s = HTMLSerializer(**serializer_opts)
+    return s.render(walker(input), encoding)
 
 
 class HTMLSerializer(object):
 
     # attribute quoting options
-    quote_attr_values = False
+    quote_attr_values = "legacy"  # be secure by default
     quote_char = '"'
     use_best_quote_char = True
 
@@ -109,9 +117,9 @@ def __init__(self, **kwargs):
         inject_meta_charset=True|False
           Whether it insert a meta element to define the character set of the
           document.
-        quote_attr_values=True|False
+        quote_attr_values="legacy"|"spec"|"always"
           Whether to quote attribute values that don't require quoting
-          per HTML5 parsing rules.
+          per legacy browser behaviour, when required by the standard, or always.
         quote_char=u'"'|u"'"
           Use given quote character for attribute quoting. Default is to
           use double quote unless attribute value contains a double quote,
@@ -147,6 +155,9 @@ def __init__(self, **kwargs):
 
         .. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation
         """
+        unexpected_args = frozenset(kwargs) - frozenset(self.options)
+        if len(unexpected_args) > 0:
+            raise TypeError("__init__() got an unexpected keyword argument '%s'" % next(iter(unexpected_args)))
         if 'quote_char' in kwargs:
             self.use_best_quote_char = False
         for attr in self.options:
@@ -157,7 +168,7 @@ def __init__(self, **kwargs):
     def encode(self, string):
         assert(isinstance(string, text_type))
         if self.encoding:
-            return string.encode(self.encoding, unicode_encode_errors)
+            return string.encode(self.encoding, "htmlentityreplace")
         else:
             return string
 
@@ -169,28 +180,30 @@ def encodeStrict(self, string):
             return string
 
     def serialize(self, treewalker, encoding=None):
+        # pylint:disable=too-many-nested-blocks
         self.encoding = encoding
         in_cdata = False
         self.errors = []
 
         if encoding and self.inject_meta_charset:
-            from ..filters.inject_meta_charset import Filter
+            from .filters.inject_meta_charset import Filter
             treewalker = Filter(treewalker, encoding)
+        # Alphabetical attributes is here under the assumption that none of
+        # the later filters add or change order of attributes; it needs to be
+        # before the sanitizer so escaped elements come out correctly
+        if self.alphabetical_attributes:
+            from .filters.alphabeticalattributes import Filter
+            treewalker = Filter(treewalker)
         # WhitespaceFilter should be used before OptionalTagFilter
         # for maximum efficiently of this latter filter
         if self.strip_whitespace:
-            from ..filters.whitespace import Filter
+            from .filters.whitespace import Filter
             treewalker = Filter(treewalker)
         if self.sanitize:
-            from ..filters.sanitizer import Filter
+            from .filters.sanitizer import Filter
             treewalker = Filter(treewalker)
         if self.omit_optional_tags:
-            from ..filters.optionaltags import Filter
-            treewalker = Filter(treewalker)
-        # Alphabetical attributes must be last, as other filters
-        # could add attributes and alter the order
-        if self.alphabetical_attributes:
-            from ..filters.alphabeticalattributes import Filter
+            from .filters.optionaltags import Filter
             treewalker = Filter(treewalker)
 
         for token in treewalker:
@@ -229,7 +242,7 @@ def serialize(self, treewalker, encoding=None):
                     in_cdata = True
                 elif in_cdata:
                     self.serializeError("Unexpected child element of a CDATA element")
-                for (attr_namespace, attr_name), attr_value in token["data"].items():
+                for (_, attr_name), attr_value in token["data"].items():
                     # TODO: Add namespace support here
                     k = attr_name
                     v = attr_value
@@ -237,14 +250,18 @@ def serialize(self, treewalker, encoding=None):
 
                     yield self.encodeStrict(k)
                     if not self.minimize_boolean_attributes or \
-                        (k not in booleanAttributes.get(name, tuple())
-                         and k not in booleanAttributes.get("", tuple())):
+                        (k not in booleanAttributes.get(name, tuple()) and
+                         k not in booleanAttributes.get("", tuple())):
                         yield self.encodeStrict("=")
-                        if self.quote_attr_values or not v:
+                        if self.quote_attr_values == "always" or len(v) == 0:
                             quote_attr = True
+                        elif self.quote_attr_values == "spec":
+                            quote_attr = _quoteAttributeSpec.search(v) is not None
+                        elif self.quote_attr_values == "legacy":
+                            quote_attr = _quoteAttributeLegacy.search(v) is not None
                         else:
-                            quote_attr = reduce(lambda x, y: x or (y in v),
-                                                spaceCharacters + ">\"'=", False)
+                            raise ValueError("quote_attr_values must be one of: "
+                                             "'always', 'spec', or 'legacy'")
                         v = v.replace("&", "&")
                         if self.escape_lt_in_attrs:
                             v = v.replace("<", "<")
@@ -312,6 +329,6 @@ def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
             raise SerializeError
 
 
-def SerializeError(Exception):
+class SerializeError(Exception):
     """Error in serialized tree"""
     pass
diff --git a/html5lib/serializer/__init__.py b/html5lib/serializer/__init__.py
deleted file mode 100644
index 8380839a..00000000
--- a/html5lib/serializer/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from .. import treewalkers
-
-from .htmlserializer import HTMLSerializer
-
-
-def serialize(input, tree="etree", format="html", encoding=None,
-              **serializer_opts):
-    # XXX: Should we cache this?
-    walker = treewalkers.getTreeWalker(tree)
-    if format == "html":
-        s = HTMLSerializer(**serializer_opts)
-    else:
-        raise ValueError("type must be html")
-    return s.render(walker(input), encoding)
diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py
new file mode 100644
index 00000000..ce93eff6
--- /dev/null
+++ b/html5lib/tests/conftest.py
@@ -0,0 +1,86 @@
+import os.path
+
+import pkg_resources
+import pytest
+
+from .tree_construction import TreeConstructionFile
+from .tokenizer import TokenizerFile
+from .sanitizer import SanitizerFile
+
+_dir = os.path.abspath(os.path.dirname(__file__))
+_root = os.path.join(_dir, "..", "..")
+_testdata = os.path.join(_dir, "testdata")
+_tree_construction = os.path.join(_testdata, "tree-construction")
+_tokenizer = os.path.join(_testdata, "tokenizer")
+_sanitizer_testdata = os.path.join(_dir, "sanitizer-testdata")
+
+
+def pytest_configure(config):
+    msgs = []
+
+    if not os.path.exists(_testdata):
+        msg = "testdata not available! "
+        if os.path.exists(os.path.join(_root, ".git")):
+            msg += ("Please run git submodule update --init --recursive " +
+                    "and then run tests again.")
+        else:
+            msg += ("The testdata doesn't appear to be included with this package, " +
+                    "so finding the right version will be hard. :(")
+        msgs.append(msg)
+
+    if config.option.update_xfail:
+        # Check for optional requirements
+        req_file = os.path.join(_root, "requirements-optional.txt")
+        if os.path.exists(req_file):
+            with open(req_file, "r") as fp:
+                for line in fp:
+                    if (line.strip() and
+                        not (line.startswith("-r") or
+                             line.startswith("#"))):
+                        if ";" in line:
+                            spec, marker = line.strip().split(";", 1)
+                        else:
+                            spec, marker = line.strip(), None
+                        req = pkg_resources.Requirement.parse(spec)
+                        if marker and not pkg_resources.evaluate_marker(marker):
+                            msgs.append("%s not available in this environment" % spec)
+                        else:
+                            try:
+                                installed = pkg_resources.working_set.find(req)
+                            except pkg_resources.VersionConflict:
+                                msgs.append("Outdated version of %s installed, need %s" % (req.name, spec))
+                            else:
+                                if not installed:
+                                    msgs.append("Need %s" % spec)
+
+        # Check cElementTree
+        import xml.etree.ElementTree as ElementTree
+
+        try:
+            import xml.etree.cElementTree as cElementTree
+        except ImportError:
+            msgs.append("cElementTree unable to be imported")
+        else:
+            if cElementTree.Element is ElementTree.Element:
+                msgs.append("cElementTree is just an alias for ElementTree")
+
+    if msgs:
+        pytest.exit("\n".join(msgs))
+
+
+def pytest_collect_file(path, parent):
+    dir = os.path.abspath(path.dirname)
+    dir_and_parents = set()
+    while dir not in dir_and_parents:
+        dir_and_parents.add(dir)
+        dir = os.path.dirname(dir)
+
+    if _tree_construction in dir_and_parents:
+        if path.ext == ".dat":
+            return TreeConstructionFile(path, parent)
+    elif _tokenizer in dir_and_parents:
+        if path.ext == ".test":
+            return TokenizerFile(path, parent)
+    elif _sanitizer_testdata in dir_and_parents:
+        if path.ext == ".dat":
+            return SanitizerFile(path, parent)
diff --git a/html5lib/tests/mockParser.py b/html5lib/tests/mockParser.py
deleted file mode 100644
index ef31527e..00000000
--- a/html5lib/tests/mockParser.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import sys
-import os
-
-if __name__ == '__main__':
-    # Allow us to import from the src directory
-    os.chdir(os.path.split(os.path.abspath(__file__))[0])
-    sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
-
-from html5lib.tokenizer import HTMLTokenizer
-
-
-class HTMLParser(object):
-    """ Fake parser to test tokenizer output """
-    def parse(self, stream, output=True):
-        tokenizer = HTMLTokenizer(stream)
-        for token in tokenizer:
-            if output:
-                print(token)
-
-if __name__ == "__main__":
-    x = HTMLParser()
-    if len(sys.argv) > 1:
-        if len(sys.argv) > 2:
-            import hotshot
-            import hotshot.stats
-            prof = hotshot.Profile('stats.prof')
-            prof.runcall(x.parse, sys.argv[1], False)
-            prof.close()
-            stats = hotshot.stats.load('stats.prof')
-            stats.strip_dirs()
-            stats.sort_stats('time')
-            stats.print_stats()
-        else:
-            x.parse(sys.argv[1])
-    else:
-        print("""Usage: python mockParser.py filename [stats]
-        If stats is specified the hotshots profiler will run and output the
-        stats instead.
-        """)
diff --git a/html5lib/tests/performance/concatenation.py b/html5lib/tests/performance/concatenation.py
deleted file mode 100644
index a1465036..00000000
--- a/html5lib/tests/performance/concatenation.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-
-def f1():
-    x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    x += y + z
-
-
-def f2():
-    x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    x = x + y + z
-
-
-def f3():
-    x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    x = "".join((x, y, z))
-
-
-def f4():
-    x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    x = "%s%s%s" % (x, y, z)
-
-import timeit
-for x in range(4):
-    statement = "f%s" % (x + 1)
-    t = timeit.Timer(statement, "from __main__ import " + statement)
-    r = t.repeat(3, 1000000)
-    print(r, min(r))
diff --git a/html5lib/tests/sanitizer-testdata/tests1.dat b/html5lib/tests/sanitizer-testdata/tests1.dat
new file mode 100644
index 00000000..74e88336
--- /dev/null
+++ b/html5lib/tests/sanitizer-testdata/tests1.dat
@@ -0,0 +1,433 @@
+[
+  {
+    "name": "IE_Comments",
+    "input": "",
+    "output": ""
+  },
+
+  {
+    "name": "IE_Comments_2",
+    "input": "",
+    "output": "<script>alert('XSS');</script>"
+  },
+
+  {
+    "name": "allow_colons_in_path_component",
+    "input": "foo",
+    "output": "foo"
+  },
+
+  {
+    "name": "background_attribute",
+    "input": "
", + "output": "
" + }, + + { + "name": "bgsound", + "input": "", + "output": "<bgsound src=\"javascript:alert('XSS');\"></bgsound>" + }, + + { + "name": "div_background_image_unicode_encoded", + "input": "
foo
", + "output": "
foo
" + }, + + { + "name": "div_expression", + "input": "
foo
", + "output": "
foo
" + }, + + { + "name": "double_open_angle_brackets", + "input": "", + "output": "" + }, + + { + "name": "img_dynsrc_lowsrc", + "input": "", + "output": "" + }, + + { + "name": "img_vbscript", + "input": "", + "output": "" + }, + + { + "name": "input_image", + "input": "", + "output": "" + }, + + { + "name": "link_stylesheets", + "input": "", + "output": "<link href=\"javascript:alert('XSS');\" rel=\"stylesheet\">" + }, + + { + "name": "link_stylesheets_2", + "input": "", + "output": "<link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\">" + }, + + { + "name": "list_style_image", + "input": "
  • foo
  • ", + "output": "
  • foo
  • " + }, + + { + "name": "no_closing_script_tags", + "input": "", + "output": "<script src=\"http://ha.ckers.org/xss.js\" xss=\"\"></script>" + }, + + { + "name": "non_alpha_non_digit_2", + "input": "foo", + "output": "foo" + }, + + { + "name": "non_alpha_non_digit_3", + "input": "", + "output": "" + }, + + { + "name": "non_alpha_non_digit_II", + "input": "foo", + "output": "foo" + }, + + { + "name": "non_alpha_non_digit_III", + "input": "foo", + "output": "foo" + }, + + { + "name": "platypus", + "input": "never trust your upstream platypus", + "output": "never trust your upstream platypus" + }, + + { + "name": "protocol_resolution_in_script_tag", + "input": "", + "output": "<script src=\"//ha.ckers.org/.j\"></script>" + }, + + { + "name": "should_allow_anchors", + "input": "", + "output": "<script>baz</script>" + }, + + { + "name": "should_allow_image_alt_attribute", + "input": "foo", + "output": "foo" + }, + + { + "name": "should_allow_image_height_attribute", + "input": "", + "output": "" + }, + + { + "name": "should_allow_image_src_attribute", + "input": "", + "output": "" + }, + + { + "name": "should_allow_image_width_attribute", + "input": "", + "output": "" + }, + + { + "name": "should_handle_blank_text", + "input": "", + "output": "" + }, + + { + "name": "should_handle_malformed_image_tags", + "input": "\">", + "output": "<script>alert(\"XSS\")</script>\">" + }, + + { + "name": "should_handle_non_html", + "input": "abc", + "output": "abc" + }, + + { + "name": "should_not_fall_for_ridiculous_hack", + "input": "", + "output": "" + }, + + { + "name": "should_not_fall_for_xss_image_hack_0", + "input": "", + "output": "" + }, + + { + "name": "should_not_fall_for_xss_image_hack_1", + "input": "", + "output": "" + }, + + { + "name": "should_not_fall_for_xss_image_hack_10", + "input": "", + "output": "" + }, + + { + "name": "should_not_fall_for_xss_image_hack_11", + "input": "", + "output": "" + }, + + { + "name": "should_not_fall_for_xss_image_hack_12", + "input": "", + "output": "" + }, + + { + "name": "should_not_fall_for_xss_image_hack_13", + "input": "", + "output": "" + }, + + { + "name": "should_not_fall_for_xss_image_hack_14", + "input": "", + "output": "" + }, + + { + "name": "should_not_fall_for_xss_image_hack_2", + "input": "", + "output": "" + }, + + { + "name": "should_not_fall_for_xss_image_hack_3", + "input": "", + "output": "" + }, + + { + "name": "should_not_fall_for_xss_image_hack_4", + "input": "", + "output": "" + }, + + { + "name": "should_not_fall_for_xss_image_hack_5", + "input": "", + "output": "" + }, + + { + "name": "should_not_fall_for_xss_image_hack_6", + "input": "", + "output": "" + }, + + { + "name": "should_not_fall_for_xss_image_hack_7", + "input": "", + "output": "" + }, + + { + "name": "should_not_fall_for_xss_image_hack_8", + "input": "", + "output": "" + }, + + { + "name": "should_not_fall_for_xss_image_hack_9", + "input": "", + "output": "" + }, + + { + "name": "should_sanitize_half_open_scripts", + "input": "", + "output": "<script src=\"http://ha.ckers.org/xss.js\" xss=\"\"></script>" + }, + + { + "name": "should_sanitize_script_tag_with_multiple_open_brackets", + "input": "<", + "output": "<<script>alert(\"XSS\");//<</script>" + }, + + { + "name": "should_sanitize_script_tag_with_multiple_open_brackets_2", + "input": "