From 6f4a282afff0307b0f2e51f15c4b45f4a7cce45a Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Tue, 12 Jan 2016 21:22:50 +0100
Subject: [PATCH 1/3] Remove the mockParser because I have no idea why we have
 it.

---
 html5lib/tests/mockParser.py | 41 ------------------------------------
 1 file changed, 41 deletions(-)
 delete mode 100644 html5lib/tests/mockParser.py

diff --git a/html5lib/tests/mockParser.py b/html5lib/tests/mockParser.py
deleted file mode 100644
index ef31527e..00000000
--- a/html5lib/tests/mockParser.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import sys
-import os
-
-if __name__ == '__main__':
-    # Allow us to import from the src directory
-    os.chdir(os.path.split(os.path.abspath(__file__))[0])
-    sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
-
-from html5lib.tokenizer import HTMLTokenizer
-
-
-class HTMLParser(object):
-    """ Fake parser to test tokenizer output """
-    def parse(self, stream, output=True):
-        tokenizer = HTMLTokenizer(stream)
-        for token in tokenizer:
-            if output:
-                print(token)
-
-if __name__ == "__main__":
-    x = HTMLParser()
-    if len(sys.argv) > 1:
-        if len(sys.argv) > 2:
-            import hotshot
-            import hotshot.stats
-            prof = hotshot.Profile('stats.prof')
-            prof.runcall(x.parse, sys.argv[1], False)
-            prof.close()
-            stats = hotshot.stats.load('stats.prof')
-            stats.strip_dirs()
-            stats.sort_stats('time')
-            stats.print_stats()
-        else:
-            x.parse(sys.argv[1])
-    else:
-        print("""Usage: python mockParser.py filename [stats]
-        If stats is specified the hotshots profiler will run and output the
-        stats instead.
-        """)

From f28c5acb9901d22bed7587aa8d58d76e94965aec Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Tue, 12 Jan 2016 21:23:50 +0100
Subject: [PATCH 2/3] We don't need Python performance tests.

---
 html5lib/tests/performance/concatenation.py | 36 ---------------------
 1 file changed, 36 deletions(-)
 delete mode 100644 html5lib/tests/performance/concatenation.py

diff --git a/html5lib/tests/performance/concatenation.py b/html5lib/tests/performance/concatenation.py
deleted file mode 100644
index a1465036..00000000
--- a/html5lib/tests/performance/concatenation.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-
-def f1():
-    x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    x += y + z
-
-
-def f2():
-    x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    x = x + y + z
-
-
-def f3():
-    x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    x = "".join((x, y, z))
-
-
-def f4():
-    x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    x = "%s%s%s" % (x, y, z)
-
-import timeit
-for x in range(4):
-    statement = "f%s" % (x + 1)
-    t = timeit.Timer(statement, "from __main__ import " + statement)
-    r = t.repeat(3, 1000000)
-    print(r, min(r))

From 5e90af858c175133c34ee548271bddb3ca5ef245 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Tue, 12 Jan 2016 21:29:23 +0100
Subject: [PATCH 3/3] Make pep8 1.7 happy.

---
 html5lib/html5parser.py               | 147 +++++++++++++-------------
 html5lib/serializer/htmlserializer.py |   4 +-
 html5lib/tests/test_serializer.py     |   3 +-
 html5lib/tests/test_tokenizer.py      |   4 +-
 html5lib/treebuilders/_base.py        |   4 +-
 html5lib/treebuilders/dom.py          |   4 +-
 html5lib/treewalkers/__init__.py      |   4 +-
 html5lib/treewalkers/_base.py         |   6 +-
 html5lib/treewalkers/genshistream.py  |   4 +-
 9 files changed, 89 insertions(+), 91 deletions(-)

diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index c2c30783..ae980c55 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -204,8 +204,8 @@ def mainLoop(self):
                     elif type == DoctypeToken:
                         new_token = phase.processDoctype(new_token)
 
-            if (type == StartTagToken and token["selfClosing"]
-                    and not token["selfClosingAcknowledged"]):
+            if (type == StartTagToken and token["selfClosing"] and
+                    not token["selfClosingAcknowledged"]):
                 self.parseError("non-void-element-with-trailing-solidus",
                                 {"name": token["name"]})
 
@@ -517,77 +517,76 @@ def processDoctype(self, token):
             if publicId != "":
                 publicId = publicId.translate(asciiUpper2Lower)
 
-            if (not correct or token["name"] != "html"
-                or publicId.startswith(
-                    ("+//silmaril//dtd html pro v0r11 19970101//",
-                     "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
-                     "-//as//dtd html 3.0 aswedit + extensions//",
-                     "-//ietf//dtd html 2.0 level 1//",
-                     "-//ietf//dtd html 2.0 level 2//",
-                     "-//ietf//dtd html 2.0 strict level 1//",
-                     "-//ietf//dtd html 2.0 strict level 2//",
-                     "-//ietf//dtd html 2.0 strict//",
-                     "-//ietf//dtd html 2.0//",
-                     "-//ietf//dtd html 2.1e//",
-                     "-//ietf//dtd html 3.0//",
-                     "-//ietf//dtd html 3.2 final//",
-                     "-//ietf//dtd html 3.2//",
-                     "-//ietf//dtd html 3//",
-                     "-//ietf//dtd html level 0//",
-                     "-//ietf//dtd html level 1//",
-                     "-//ietf//dtd html level 2//",
-                     "-//ietf//dtd html level 3//",
-                     "-//ietf//dtd html strict level 0//",
-                     "-//ietf//dtd html strict level 1//",
-                     "-//ietf//dtd html strict level 2//",
-                     "-//ietf//dtd html strict level 3//",
-                     "-//ietf//dtd html strict//",
-                     "-//ietf//dtd html//",
-                     "-//metrius//dtd metrius presentational//",
-                     "-//microsoft//dtd internet explorer 2.0 html strict//",
-                     "-//microsoft//dtd internet explorer 2.0 html//",
-                     "-//microsoft//dtd internet explorer 2.0 tables//",
-                     "-//microsoft//dtd internet explorer 3.0 html strict//",
-                     "-//microsoft//dtd internet explorer 3.0 html//",
-                     "-//microsoft//dtd internet explorer 3.0 tables//",
-                     "-//netscape comm. corp.//dtd html//",
-                     "-//netscape comm. corp.//dtd strict html//",
-                     "-//o'reilly and associates//dtd html 2.0//",
-                     "-//o'reilly and associates//dtd html extended 1.0//",
-                     "-//o'reilly and associates//dtd html extended relaxed 1.0//",
-                     "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
-                     "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
-                     "-//spyglass//dtd html 2.0 extended//",
-                     "-//sq//dtd html 2.0 hotmetal + extensions//",
-                     "-//sun microsystems corp.//dtd hotjava html//",
-                     "-//sun microsystems corp.//dtd hotjava strict html//",
-                     "-//w3c//dtd html 3 1995-03-24//",
-                     "-//w3c//dtd html 3.2 draft//",
-                     "-//w3c//dtd html 3.2 final//",
-                     "-//w3c//dtd html 3.2//",
-                     "-//w3c//dtd html 3.2s draft//",
-                     "-//w3c//dtd html 4.0 frameset//",
-                     "-//w3c//dtd html 4.0 transitional//",
-                     "-//w3c//dtd html experimental 19960712//",
-                     "-//w3c//dtd html experimental 970421//",
-                     "-//w3c//dtd w3 html//",
-                     "-//w3o//dtd w3 html 3.0//",
-                     "-//webtechs//dtd mozilla html 2.0//",
-                     "-//webtechs//dtd mozilla html//"))
-                or publicId in
-                    ("-//w3o//dtd w3 html strict 3.0//en//",
-                     "-/w3c/dtd html 4.0 transitional/en",
-                     "html")
-                or publicId.startswith(
-                    ("-//w3c//dtd html 4.01 frameset//",
-                     "-//w3c//dtd html 4.01 transitional//")) and
-                    systemId is None
-                    or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
+            if (not correct or token["name"] != "html" or
+                    publicId.startswith(
+                        ("+//silmaril//dtd html pro v0r11 19970101//",
+                         "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+                         "-//as//dtd html 3.0 aswedit + extensions//",
+                         "-//ietf//dtd html 2.0 level 1//",
+                         "-//ietf//dtd html 2.0 level 2//",
+                         "-//ietf//dtd html 2.0 strict level 1//",
+                         "-//ietf//dtd html 2.0 strict level 2//",
+                         "-//ietf//dtd html 2.0 strict//",
+                         "-//ietf//dtd html 2.0//",
+                         "-//ietf//dtd html 2.1e//",
+                         "-//ietf//dtd html 3.0//",
+                         "-//ietf//dtd html 3.2 final//",
+                         "-//ietf//dtd html 3.2//",
+                         "-//ietf//dtd html 3//",
+                         "-//ietf//dtd html level 0//",
+                         "-//ietf//dtd html level 1//",
+                         "-//ietf//dtd html level 2//",
+                         "-//ietf//dtd html level 3//",
+                         "-//ietf//dtd html strict level 0//",
+                         "-//ietf//dtd html strict level 1//",
+                         "-//ietf//dtd html strict level 2//",
+                         "-//ietf//dtd html strict level 3//",
+                         "-//ietf//dtd html strict//",
+                         "-//ietf//dtd html//",
+                         "-//metrius//dtd metrius presentational//",
+                         "-//microsoft//dtd internet explorer 2.0 html strict//",
+                         "-//microsoft//dtd internet explorer 2.0 html//",
+                         "-//microsoft//dtd internet explorer 2.0 tables//",
+                         "-//microsoft//dtd internet explorer 3.0 html strict//",
+                         "-//microsoft//dtd internet explorer 3.0 html//",
+                         "-//microsoft//dtd internet explorer 3.0 tables//",
+                         "-//netscape comm. corp.//dtd html//",
+                         "-//netscape comm. corp.//dtd strict html//",
+                         "-//o'reilly and associates//dtd html 2.0//",
+                         "-//o'reilly and associates//dtd html extended 1.0//",
+                         "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+                         "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+                         "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+                         "-//spyglass//dtd html 2.0 extended//",
+                         "-//sq//dtd html 2.0 hotmetal + extensions//",
+                         "-//sun microsystems corp.//dtd hotjava html//",
+                         "-//sun microsystems corp.//dtd hotjava strict html//",
+                         "-//w3c//dtd html 3 1995-03-24//",
+                         "-//w3c//dtd html 3.2 draft//",
+                         "-//w3c//dtd html 3.2 final//",
+                         "-//w3c//dtd html 3.2//",
+                         "-//w3c//dtd html 3.2s draft//",
+                         "-//w3c//dtd html 4.0 frameset//",
+                         "-//w3c//dtd html 4.0 transitional//",
+                         "-//w3c//dtd html experimental 19960712//",
+                         "-//w3c//dtd html experimental 970421//",
+                         "-//w3c//dtd w3 html//",
+                         "-//w3o//dtd w3 html 3.0//",
+                         "-//webtechs//dtd mozilla html 2.0//",
+                         "-//webtechs//dtd mozilla html//")) or
+                    publicId in ("-//w3o//dtd w3 html strict 3.0//en//",
+                                 "-/w3c/dtd html 4.0 transitional/en",
+                                 "html") or
+                    publicId.startswith(
+                        ("-//w3c//dtd html 4.01 frameset//",
+                         "-//w3c//dtd html 4.01 transitional//")) and
+                    systemId is None or
+                    systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
                 self.parser.compatMode = "quirks"
             elif (publicId.startswith(
                     ("-//w3c//dtd xhtml 1.0 frameset//",
-                     "-//w3c//dtd xhtml 1.0 transitional//"))
-                  or publicId.startswith(
+                     "-//w3c//dtd xhtml 1.0 transitional//")) or
+                  publicId.startswith(
                       ("-//w3c//dtd html 4.01 frameset//",
                        "-//w3c//dtd html 4.01 transitional//")) and
                   systemId is not None):
@@ -988,8 +987,8 @@ def processSpaceCharactersDropNewline(self, token):
             data = token["data"]
             self.processSpaceCharacters = self.processSpaceCharactersNonPre
             if (data.startswith("\n") and
-                self.tree.openElements[-1].name in ("pre", "listing", "textarea")
-                    and not self.tree.openElements[-1].hasContent()):
+                self.tree.openElements[-1].name in ("pre", "listing", "textarea") and
+                    not self.tree.openElements[-1].hasContent()):
                 data = data[1:]
             if data:
                 self.tree.reconstructActiveFormattingElements()
@@ -1016,8 +1015,8 @@ def startTagProcessInHead(self, token):
 
         def startTagBody(self, token):
             self.parser.parseError("unexpected-start-tag", {"name": "body"})
-            if (len(self.tree.openElements) == 1
-                    or self.tree.openElements[1].name != "body"):
+            if (len(self.tree.openElements) == 1 or
+                    self.tree.openElements[1].name != "body"):
                 assert self.parser.innerHTML
             else:
                 self.parser.framesetOK = False
diff --git a/html5lib/serializer/htmlserializer.py b/html5lib/serializer/htmlserializer.py
index be4d6344..b87d9a75 100644
--- a/html5lib/serializer/htmlserializer.py
+++ b/html5lib/serializer/htmlserializer.py
@@ -237,8 +237,8 @@ def serialize(self, treewalker, encoding=None):
 
                     yield self.encodeStrict(k)
                     if not self.minimize_boolean_attributes or \
-                        (k not in booleanAttributes.get(name, tuple())
-                         and k not in booleanAttributes.get("", tuple())):
+                        (k not in booleanAttributes.get(name, tuple()) and
+                         k not in booleanAttributes.get("", tuple())):
                         yield self.encodeStrict("=")
                         if self.quote_attr_values or not v:
                             quote_attr = True
diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
index 3c37feff..af76075e 100644
--- a/html5lib/tests/test_serializer.py
+++ b/html5lib/tests/test_serializer.py
@@ -91,8 +91,7 @@ def runSerializerTest(input, expected, options):
     encoding = options.get("encoding", None)
 
     if encoding:
-        encode = lambda x: x.encode(encoding)
-        expected = list(map(encode, expected))
+        expected = list(map(lambda x: x.encode(encoding), expected))
 
     result = serialize_html(input, options)
     if len(expected) == 1:
diff --git a/html5lib/tests/test_tokenizer.py b/html5lib/tests/test_tokenizer.py
index 823c6ea6..87e098f3 100644
--- a/html5lib/tests/test_tokenizer.py
+++ b/html5lib/tests/test_tokenizer.py
@@ -98,8 +98,8 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
     """
     checkSelfClosing = False
     for token in expectedTokens:
-        if (token[0] == "StartTag" and len(token) == 4
-                or token[0] == "EndTag" and len(token) == 3):
+        if (token[0] == "StartTag" and len(token) == 4 or
+                token[0] == "EndTag" and len(token) == 3):
             checkSelfClosing = True
             break
 
diff --git a/html5lib/treebuilders/_base.py b/html5lib/treebuilders/_base.py
index 8b97cc11..8196f591 100644
--- a/html5lib/treebuilders/_base.py
+++ b/html5lib/treebuilders/_base.py
@@ -353,8 +353,8 @@ def getTableMisnestedNodePosition(self):
     def generateImpliedEndTags(self, exclude=None):
         name = self.openElements[-1].name
         # XXX td, th and tr are not actually needed
-        if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
-                and name != exclude):
+        if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and
+                name != exclude):
             self.openElements.pop()
             # XXX This is not entirely what the specification says. We should
             # investigate it more closely.
diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py
index 234233b7..8656244f 100644
--- a/html5lib/treebuilders/dom.py
+++ b/html5lib/treebuilders/dom.py
@@ -47,8 +47,8 @@ def __init__(self, element):
             _base.Node.__init__(self, element.nodeName)
             self.element = element
 
-        namespace = property(lambda self: hasattr(self.element, "namespaceURI")
-                             and self.element.namespaceURI or None)
+        namespace = property(lambda self: hasattr(self.element, "namespaceURI") and
+                             self.element.namespaceURI or None)
 
         def appendChild(self, node):
             node.parent = self
diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py
index 21f46b01..00ae2804 100644
--- a/html5lib/treewalkers/__init__.py
+++ b/html5lib/treewalkers/__init__.py
@@ -10,11 +10,11 @@
 
 from __future__ import absolute_import, division, unicode_literals
 
-__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree"]
-
 from .. import constants
 from ..utils import default_etree
 
+__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree"]
+
 treeWalkerCache = {}
 
 
diff --git a/html5lib/treewalkers/_base.py b/html5lib/treewalkers/_base.py
index 4e11cd02..e79a4357 100644
--- a/html5lib/treewalkers/_base.py
+++ b/html5lib/treewalkers/_base.py
@@ -1,11 +1,12 @@
 from __future__ import absolute_import, division, unicode_literals
 from six import text_type, string_types
 
+from xml.dom import Node
+from ..constants import voidElements, spaceCharacters
+
 __all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN",
            "TreeWalker", "NonRecursiveTreeWalker"]
 
-from xml.dom import Node
-
 DOCUMENT = Node.DOCUMENT_NODE
 DOCTYPE = Node.DOCUMENT_TYPE_NODE
 TEXT = Node.TEXT_NODE
@@ -14,7 +15,6 @@
 ENTITY = Node.ENTITY_NODE
 UNKNOWN = "<#UNKNOWN#>"
 
-from ..constants import voidElements, spaceCharacters
 spaceCharacters = "".join(spaceCharacters)
 
 
diff --git a/html5lib/treewalkers/genshistream.py b/html5lib/treewalkers/genshistream.py
index f559c45d..24d33282 100644
--- a/html5lib/treewalkers/genshistream.py
+++ b/html5lib/treewalkers/genshistream.py
@@ -39,8 +39,8 @@ def tokens(self, event, next):
 
             if namespace == namespaces["html"] and name in voidElements:
                 for token in self.emptyTag(namespace, name, converted_attribs,
-                                           not next or next[0] != END
-                                           or next[1] != tag):
+                                           not next or next[0] != END or
+                                           next[1] != tag):
                     yield token
             else:
                 yield self.startTag(namespace, name, converted_attribs)