From 1be9532f79fd7744be0945c4ab42d2f5b41e4e73 Mon Sep 17 00:00:00 2001 From: Ritwik Gupta Date: Mon, 24 Nov 2014 16:23:01 -0500 Subject: [PATCH 001/179] Added iframe seamless boolean attribute --- html5lib/constants.py | 1 + 1 file changed, 1 insertion(+) diff --git a/html5lib/constants.py b/html5lib/constants.py index e7089846..659f2b5e 100644 --- a/html5lib/constants.py +++ b/html5lib/constants.py @@ -535,6 +535,7 @@ "input": frozenset(("disabled", "readonly", "required", "autofocus", "checked", "ismap")), "select": frozenset(("disabled", "readonly", "autofocus", "multiple")), "output": frozenset(("disabled", "readonly")), + "iframe": frozenset(("seamless")), } # entitiesWindows1252 has to be _ordered_ and needs to have an index. It From 4dfe3cd9f97ce51c53463d633308f4a3fe6ad9e6 Mon Sep 17 00:00:00 2001 From: Ritwik Gupta Date: Mon, 24 Nov 2014 16:25:04 -0500 Subject: [PATCH 002/179] Update CHANGES.rst --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 1431b3c9..89e48f94 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,7 +6,7 @@ Change Log Released on XXX, 2014 -* XXX +* Fix #XXX: added the seamless attribute for iframes. 0.999 From 7fd79e31e083ab75305b3e837ea9aa8c9b4675ff Mon Sep 17 00:00:00 2001 From: Ritwik Gupta Date: Mon, 24 Nov 2014 16:25:28 -0500 Subject: [PATCH 003/179] Update AUTHORS.rst --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index 4148a6ed..787c3b94 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -32,3 +32,4 @@ Patches and suggestions - Juan Carlos Garcia Segovia - Mike West - Marc DM +- Ritwik Gupta From 563dc298ea43021f9a9306fc7da3734ea5d9d8ec Mon Sep 17 00:00:00 2001 From: Adam Chainz Date: Sun, 29 May 2016 21:21:58 +0100 Subject: [PATCH 004/179] Convert readthedocs link for their .org -> .io migration for hosted projects (#261) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As per [their blog post of the 27th April](https://blog.readthedocs.com/securing-subdomains/) ‘Securing subdomains’: > Starting today, Read the Docs will start hosting projects from subdomains on the domain readthedocs.io, instead of on readthedocs.org. This change addresses some security concerns around site cookies while hosting user generated data on the same domain as our dashboard. Test Plan: Manually visited all the links I’ve modified. --- CHANGES.rst | 2 +- README.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 1f87d9ab..cdf21bc4 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -219,7 +219,7 @@ Released on May 17, 2013 * Test harness has been improved and now depends on ``nose``. -* Documentation updated and moved to http://html5lib.readthedocs.org/. +* Documentation updated and moved to https://html5lib.readthedocs.io/. 0.95 diff --git a/README.rst b/README.rst index 47eb90d3..6859ed30 100644 --- a/README.rst +++ b/README.rst @@ -84,7 +84,7 @@ format: parser = html5lib.HTMLParser(tree=html5lib.getTreeBuilder("dom")) minidom_document = parser.parse("

Hello World!") -More documentation is available at http://html5lib.readthedocs.org/. +More documentation is available at https://html5lib.readthedocs.io/. Installation From 1ff3c802503388f39ce658decfa41f517cfd28f2 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Mon, 6 Jun 2016 21:57:48 +0100 Subject: [PATCH 005/179] Fix #239: make elementInScope consider namespaces for target (#262) r=nobody! --- .pytest.expect | 45 ---------------------------------- html5lib/treebuilders/_base.py | 9 +++++-- 2 files changed, 7 insertions(+), 47 deletions(-) diff --git a/.pytest.expect b/.pytest.expect index 8bfcf4b7..f03e18f8 100644 --- a/.pytest.expect +++ b/.pytest.expect @@ -208,51 +208,6 @@ u'html5lib/tests/testdata/tree-construction/isindex.dat::3::cElementTree::parser u'html5lib/tests/testdata/tree-construction/isindex.dat::3::cElementTree::parser::void-namespace': FAIL u'html5lib/tests/testdata/tree-construction/isindex.dat::3::lxml::parser::namespaced': FAIL u'html5lib/tests/testdata/tree-construction/isindex.dat::3::lxml::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::0::DOM::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::0::DOM::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::0::DOM::treewalker': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::0::ElementTree::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::0::ElementTree::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::0::ElementTree::treewalker': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::0::cElementTree::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::0::cElementTree::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::0::cElementTree::treewalker': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::0::genshi::treewalker': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::0::lxml::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::0::lxml::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::0::lxml::treewalker': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::1::DOM::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::1::DOM::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::1::ElementTree::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::1::ElementTree::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::1::cElementTree::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::1::cElementTree::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::1::lxml::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::1::lxml::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::2::DOM::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::2::DOM::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::2::ElementTree::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::2::ElementTree::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::2::cElementTree::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::2::cElementTree::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::2::lxml::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::2::lxml::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::3::DOM::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::3::DOM::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::3::ElementTree::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::3::ElementTree::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::3::cElementTree::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::3::cElementTree::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::3::lxml::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::3::lxml::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::4::DOM::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::4::DOM::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::4::ElementTree::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::4::ElementTree::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::4::cElementTree::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::4::cElementTree::parser::void-namespace': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::4::lxml::parser::namespaced': FAIL -u'html5lib/tests/testdata/tree-construction/math.dat::4::lxml::parser::void-namespace': FAIL u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::3::DOM::parser::namespaced': FAIL u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::3::DOM::parser::void-namespace': FAIL u'html5lib/tests/testdata/tree-construction/menuitem-element.dat::3::ElementTree::parser::namespaced': FAIL diff --git a/html5lib/treebuilders/_base.py b/html5lib/treebuilders/_base.py index 900a724c..a4b2792a 100644 --- a/html5lib/treebuilders/_base.py +++ b/html5lib/treebuilders/_base.py @@ -167,12 +167,17 @@ def elementInScope(self, target, variant=None): # If we pass a node in we match that. if we pass a string # match any node with that name exactNode = hasattr(target, "nameTuple") + if not exactNode: + if isinstance(target, text_type): + target = (namespaces["html"], target) + assert isinstance(target, tuple) listElements, invert = listElementsMap[variant] for node in reversed(self.openElements): - if (node.name == target and not exactNode or - node == target and exactNode): + if exactNode and node == target: + return True + elif not exactNode and node.nameTuple == target: return True elif (invert ^ (node.nameTuple in listElements)): return False From 5197f557afe178b9ff7ec9c37a364c73ce00194e Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Mon, 6 Jun 2016 22:40:46 +0100 Subject: [PATCH 006/179] Fix #132: add test for #115, single character fragments (#264) r=nobody! --- html5lib/tests/test_treewalkers.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index 81ed2778..67fc89e5 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -1,5 +1,7 @@ from __future__ import absolute_import, division, unicode_literals +import itertools + import pytest try: @@ -100,6 +102,24 @@ def test_treewalker_six_mix(): yield runTreewalkerEditTest, intext, expected, attrs, tree +@pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"])) +def test_fragment_single_char(tree, char): + expected = [ + {'data': char, 'type': 'Characters'} + ] + + treeName, treeClass = tree + if treeClass is None: + pytest.skip("Treebuilder not loaded") + + parser = html5parser.HTMLParser(tree=treeClass["builder"]) + document = parser.parseFragment(char) + document = treeClass.get("adapter", lambda x: x)(document) + output = Lint(treeClass["walker"](document)) + + assert list(output) == expected + + @pytest.mark.skipif(treeTypes["lxml"] is None, reason="lxml not importable") def test_lxml_xml(): expected = [ From d5b0dc26ca5f3f89db0a7256894d322cd3180df7 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Mon, 6 Jun 2016 17:16:28 +0100 Subject: [PATCH 007/179] Make sure setup.py works regardless of cwd --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4d5f1523..0d867279 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ long_description = readme_file.read() + '\n' + changes_file.read() version = None -with open(join("html5lib", "__init__.py"), "rb") as init_file: +with open(join(here, "html5lib", "__init__.py"), "rb") as init_file: t = ast.parse(init_file.read(), filename="__init__.py", mode="exec") assert isinstance(t, ast.Module) assignments = filter(lambda x: isinstance(x, ast.Assign), t.body) From 21aabd6413c7429b1111265714b3de9c155bda24 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Mon, 6 Jun 2016 18:15:32 +0100 Subject: [PATCH 008/179] Try fixing #231 again: Return to using platform_python_implementation This makes us require setuptools>=18.5 --- requirements-install.sh | 2 +- requirements.txt | 1 + setup.py | 7 ++++--- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/requirements-install.sh b/requirements-install.sh index 8cab142d..9b28888a 100755 --- a/requirements-install.sh +++ b/requirements-install.sh @@ -6,7 +6,7 @@ if [[ $USE_OPTIONAL != "true" && $USE_OPTIONAL != "false" ]]; then fi # Make sure we're running setuptools >= 18.5 -pip install -U pip setuptools +pip install -U pip setuptools>=18.5 pip install -U -r requirements-test.txt diff --git a/requirements.txt b/requirements.txt index 745993b9..92c09036 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ six webencodings ordereddict ; python_version < '2.7' +setuptools>=18.5 diff --git a/setup.py b/setup.py index 0d867279..7cc30534 100644 --- a/setup.py +++ b/setup.py @@ -52,6 +52,7 @@ install_requires=[ 'six', 'webencodings', + 'setuptools>=18.5' ], extras_require={ # A empty extra that only has a conditional marker will be @@ -60,8 +61,8 @@ # A conditional extra will only install these items when the extra is # requested and the condition matches. - "datrie:platform.python_implementation == 'CPython'": ["datrie"], - "lxml:platform.python_implementation == 'CPython'": ["lxml"], + "datrie:platform_python_implementation == 'CPython'": ["datrie"], + "lxml:platform_python_implementation == 'CPython'": ["lxml"], # Standard extras, will be installed when the extra is requested. "genshi": ["genshi"], @@ -72,6 +73,6 @@ # extra that will be installed whenever the condition matches and the # all extra is requested. "all": ["genshi", "chardet>=2.2"], - "all:platform.python_implementation == 'CPython'": ["datrie", "lxml"], + "all:platform_python_implementation == 'CPython'": ["datrie", "lxml"], }, ) From c562f28b03df132c6311a7c930635dd0e12abc5b Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Mon, 6 Jun 2016 18:21:52 +0100 Subject: [PATCH 009/179] Give a more useful error message than a SyntaxError on old setuptools --- setup.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7cc30534..7c419e2c 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,17 @@ +from __future__ import print_function + import ast import codecs +import sys from os.path import join, dirname -from setuptools import setup, find_packages +from setuptools import setup, find_packages, __version__ as setuptools_version +from pkg_resources import parse_version +if parse_version(setuptools_version) < parse_version("18.5"): + print("html5lib requires setuptools version 18.5 or above; " + "please upgrade before installing (you have %s)" % setuptools_version) + sys.exit(1) classifiers = [ 'Development Status :: 5 - Production/Stable', From 11bdb490d410931ed366a0f6161ed8144efde315 Mon Sep 17 00:00:00 2001 From: Jon Dufresne Date: Mon, 27 Jun 2016 05:53:57 -0700 Subject: [PATCH 010/179] Make self closing col start tags not cause a parse error (#244) --- AUTHORS.rst | 1 + html5lib/html5parser.py | 9 ++++++--- html5lib/tests/test_parser2.py | 6 ++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index fe9ae89b..c3820ef7 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -41,3 +41,4 @@ Patches and suggestions - Jim Baker - Michael[tm] Smith - Marc Abramowitz +- Jon Dufresne diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index daee854c..3daf2995 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -171,8 +171,10 @@ def mainLoop(self): ParseErrorToken = tokenTypes["ParseError"] for token in self.normalizedTokens(): + prev_token = None new_token = token while new_token is not None: + prev_token = new_token currentNode = self.tree.openElements[-1] if self.tree.openElements else None currentNodeNamespace = currentNode.namespace if currentNode else None currentNodeName = currentNode.name if currentNode else None @@ -211,10 +213,10 @@ def mainLoop(self): elif type == DoctypeToken: new_token = phase.processDoctype(new_token) - if (type == StartTagToken and token["selfClosing"] and - not token["selfClosingAcknowledged"]): + if (type == StartTagToken and prev_token["selfClosing"] and + not prev_token["selfClosingAcknowledged"]): self.parseError("non-void-element-with-trailing-solidus", - {"name": token["name"]}) + {"name": prev_token["name"]}) # When the loop finishes it's EOF reprocess = True @@ -1933,6 +1935,7 @@ def processCharacters(self, token): def startTagCol(self, token): self.tree.insertElement(token) self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True def startTagOther(self, token): ignoreEndTag = self.ignoreEndTagColgroup() diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py index b7a92fd7..21dc59d9 100644 --- a/html5lib/tests/test_parser2.py +++ b/html5lib/tests/test_parser2.py @@ -93,3 +93,9 @@ def test_debug_log(): def test_no_duplicate_clone(): frag = parseFragment("