From 4a87368b71090f1432df6302f178c4babfcec93f Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Wed, 10 Jan 2024 15:13:40 +0100
Subject: [PATCH 1/3] GitHub Actions: python: ["3.8", "3.9", "3.10", "3.11",
 "3.12"] (#575)

---
 .appveyor.yml                       | 30 +++++++++++++++++------------
 .github/workflows/python-tox.yml    | 19 ++++++++----------
 html5lib/tests/tokenizer.py         |  4 +++-
 html5lib/tests/tree_construction.py |  4 +++-
 requirements-test.txt               |  3 ++-
 5 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 7661aa63..e6f7bf48 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -1,23 +1,29 @@
-image: Visual Studio 2019
+# appveyor.yml - https://www.appveyor.com/docs/lang/python
+# https://www.appveyor.com/docs/windows-images-software/#visual-studio-2022
+---
+image: Visual Studio 2022
 environment:
-  global:
-    PATH: "C:\\Python27\\Scripts\\;%PATH%"
   matrix:
-    - TOXENV: py27-base
-    - TOXENV: py27-optional
-    - TOXENV: py35-base
-    - TOXENV: py35-optional
-    - TOXENV: py36-base
-    - TOXENV: py36-optional
+    - PY_PYTHON: 2.7
+      TOXENV: py27-base
+    - PY_PYTHON: 2.7
+      TOXENV: py27-optional
+    - PY_PYTHON: 3.7
+      TOXENV: py37-base
+    - PY_PYTHON: 3.7
+      TOXENV: py37-optional
 
 install:
   - git submodule update --init --recursive
-  - python -m pip install tox
+  - py --list
+  - py -VV
+  - py -m pip install --upgrade pip
+  - py -m pip install tox
 
 build: off
 
 test_script:
-  - tox
+  - py -m tox
 
 after_test:
-  - python debug-info.py
+  - py debug-info.py
diff --git a/.github/workflows/python-tox.yml b/.github/workflows/python-tox.yml
index cfcc42e6..5ed83175 100644
--- a/.github/workflows/python-tox.yml
+++ b/.github/workflows/python-tox.yml
@@ -7,30 +7,27 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        # 2.7, 3.5, and 3.6 run on Windows via AppVeyor
-        python: ["3.7", "3.8", "3.9", "3.10", "3.11"]
+        # 2.7 and 3.7 run on Windows via AppVeyor
+        python: ["3.8", "3.9", "3.10", "3.11", "3.12"]
         os: [ubuntu-latest, windows-latest]
         deps: [base, optional]
         include:
           - python: "pypy-2.7"
             os: ubuntu-latest
             deps: base
-          - python: "pypy-3.8"
+          - python: "pypy-3.10"
             os: ubuntu-latest
             deps: base
-          - python: "2.7"
-            os: ubuntu-latest
-            deps: oldest
-          - python: "3.7"
+          - python: "3.8"
             os: ubuntu-latest
             deps: oldest
     runs-on: ${{ matrix.os }}
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
       - if: ${{ matrix.deps == 'base' }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python }}
           cache: pip
@@ -38,7 +35,7 @@ jobs:
             requirements.txt
             requirements-test.txt
       - if: ${{ matrix.deps == 'optional' }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python }}
           cache: pip
@@ -47,7 +44,7 @@ jobs:
             requirements-optional.txt
             requirements-test.txt
       - if: ${{ matrix.deps == 'oldest' }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python }}
           cache: pip
diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py
index cc9897a4..b49d2e6e 100644
--- a/html5lib/tests/tokenizer.py
+++ b/html5lib/tests/tokenizer.py
@@ -246,7 +246,9 @@ def runtest(self):
     def repr_failure(self, excinfo):
         traceback = excinfo.traceback
         ntraceback = traceback.cut(path=__file__)
-        excinfo.traceback = ntraceback.filter()
+        pytest_ver = getattr(pytest, "version_tuple", ())
+        filter_args = (excinfo,) if pytest_ver >= (7, 4, 0) else ()
+        excinfo.traceback = ntraceback.filter(*filter_args)
 
         return excinfo.getrepr(funcargs=True,
                                showlocals=False,
diff --git a/html5lib/tests/tree_construction.py b/html5lib/tests/tree_construction.py
index fb0657bf..363b48c2 100644
--- a/html5lib/tests/tree_construction.py
+++ b/html5lib/tests/tree_construction.py
@@ -135,7 +135,9 @@ def runtest(self):
     def repr_failure(self, excinfo):
         traceback = excinfo.traceback
         ntraceback = traceback.cut(path=__file__)
-        excinfo.traceback = ntraceback.filter()
+        pytest_ver = getattr(pytest, "version_tuple", ())
+        filter_args = (excinfo,) if pytest_ver >= (7, 4, 0) else ()
+        excinfo.traceback = ntraceback.filter(*filter_args)
 
         return excinfo.getrepr(funcargs=True,
                                showlocals=False,
diff --git a/requirements-test.txt b/requirements-test.txt
index 27866e59..39913ee4 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -3,8 +3,9 @@
 flake8==3.9.2 ; python_version < '3.6'
 flake8>=5.0.4; python_version >= '3.6'
 pytest>=4.6.10,<5 ; python_version < '3'
-pytest>=5.4.2,<7 ; python_version >= '3'
+pytest>=5.4.2,<8 ; python_version >= '3'
 coverage>=5.1,<6
 pytest-expect>=1.1.0,<2
 mock>=3.0.5,<4 ; python_version < '3.6'
 mock>=4.0.2,<5 ; python_version >= '3.6'
+setuptools; python_version >= '3.12'

From 82c2599585a6119e5afd26e58e754972c79f6734 Mon Sep 17 00:00:00 2001
From: Eli Schwartz <eschwartz93@gmail.com>
Date: Wed, 10 Jan 2024 13:31:09 -0500
Subject: [PATCH 2/3] tests: drop dependency on external mock module for newer
 python (#574)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes #541

Co-authored-by: Łukasz Langa <lukasz@langa.pl>
---
 html5lib/tests/test_meta.py | 5 ++++-
 requirements-test.txt       | 3 +--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/html5lib/tests/test_meta.py b/html5lib/tests/test_meta.py
index dd02dd7f..e02268aa 100644
--- a/html5lib/tests/test_meta.py
+++ b/html5lib/tests/test_meta.py
@@ -1,7 +1,10 @@
 from __future__ import absolute_import, division, unicode_literals
 
 import six
-from mock import Mock
+try:
+    from unittest.mock import Mock
+except ImportError:
+    from mock import Mock
 
 from . import support
 
diff --git a/requirements-test.txt b/requirements-test.txt
index 39913ee4..aca31f5e 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -6,6 +6,5 @@ pytest>=4.6.10,<5 ; python_version < '3'
 pytest>=5.4.2,<8 ; python_version >= '3'
 coverage>=5.1,<6
 pytest-expect>=1.1.0,<2
-mock>=3.0.5,<4 ; python_version < '3.6'
-mock>=4.0.2,<5 ; python_version >= '3.6'
+mock>=3.0.5,<4 ; python_version < '3.3'
 setuptools; python_version >= '3.12'

From fd4f032bc090d44fb11a84b352dad7cbee0a4745 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Langa?= <lukasz@langa.pl>
Date: Wed, 21 Feb 2024 16:31:38 +0100
Subject: [PATCH 3/3] Constant phases (#567)

* Get rid of getPhases

This added a fair bit of complexity, and notable made the Phase classes
dynamically generated.

However, by doing this, we no longer include "process the
token using the rules for" phases in the debug log.

Co-authored-by: Sam Sneddon <me@gsnedders.com>
---
 html5lib/_utils.py             |   12 -
 html5lib/html5parser.py        | 4349 ++++++++++++++++----------------
 html5lib/tests/test_parser2.py |    1 -
 3 files changed, 2172 insertions(+), 2190 deletions(-)

diff --git a/html5lib/_utils.py b/html5lib/_utils.py
index 9ea57942..7e23ee57 100644
--- a/html5lib/_utils.py
+++ b/html5lib/_utils.py
@@ -145,15 +145,3 @@ def moduleFactory(baseModule, *args, **kwargs):
             return mod
 
     return moduleFactory
-
-
-def memoize(func):
-    cache = {}
-
-    def wrapped(*args, **kwargs):
-        key = (tuple(args), tuple(kwargs.items()))
-        if key not in cache:
-            cache[key] = func(*args, **kwargs)
-        return cache[key]
-
-    return wrapped
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index 4c2d4c75..b3c206d1 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -1,7 +1,5 @@
 from __future__ import absolute_import, division, unicode_literals
-from six import with_metaclass, viewkeys
-
-import types
+from six import viewkeys
 
 from . import _inputstream
 from . import _tokenizer
@@ -13,7 +11,7 @@
 from .constants import (
     spaceCharacters, asciiUpper2Lower,
     specialElements, headingElements, cdataElements, rcdataElements,
-    tokenTypes, tagTokenTypes,
+    tokenTypes,
     namespaces,
     htmlIntegrationPointElements, mathmlTextIntegrationPointElements,
     adjustForeignAttributes as adjustForeignAttributesMap,
@@ -71,18 +69,6 @@ def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElemen
     return p.parseFragment(doc, container=container, **kwargs)
 
 
-def method_decorator_metaclass(function):
-    class Decorated(type):
-        def __new__(meta, classname, bases, classDict):
-            for attributeName, attribute in classDict.items():
-                if isinstance(attribute, types.FunctionType):
-                    attribute = function(attribute)
-
-                classDict[attributeName] = attribute
-            return type.__new__(meta, classname, bases, classDict)
-    return Decorated
-
-
 class HTMLParser(object):
     """HTML parser
 
@@ -112,6 +98,7 @@ def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=Fa
 
         # Raise an exception on the first error encountered
         self.strict = strict
+        self.debug = debug
 
         if tree is None:
             tree = treebuilders.getTreeBuilder("etree")
@@ -122,7 +109,7 @@ def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=Fa
         self.errors = []
 
         self.phases = {name: cls(self, self.tree) for name, cls in
-                       getPhases(debug).items()}
+                       _phases.items()}
 
     def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs):
 
@@ -204,6 +191,9 @@ def mainLoop(self):
         DoctypeToken = tokenTypes["Doctype"]
         ParseErrorToken = tokenTypes["ParseError"]
 
+        type_names = {value: key for key, value in tokenTypes.items()}
+        debug = self.debug
+
         for token in self.tokenizer:
             prev_token = None
             new_token = token
@@ -235,6 +225,17 @@ def mainLoop(self):
                     else:
                         phase = self.phases["inForeignContent"]
 
+                    if debug:
+                        info = {"type": type_names[type]}
+                        if type in (StartTagToken, EndTagToken):
+                            info["name"] = new_token['name']
+
+                        self.log.append((self.tokenizer.state.__name__,
+                                         self.phase.__class__.__name__,
+                                         phase.__class__.__name__,
+                                         "process" + info["type"],
+                                         info))
+
                     if type == CharactersToken:
                         new_token = phase.processCharacters(new_token)
                     elif type == SpaceCharactersToken:
@@ -396,2386 +397,2380 @@ def parseRCDataRawtext(self, token, contentType):
         self.phase = self.phases["text"]
 
 
-@_utils.memoize
-def getPhases(debug):
-    def log(function):
-        """Logger that records which phase processes each token"""
-        type_names = {value: key for key, value in tokenTypes.items()}
-
-        def wrapped(self, *args, **kwargs):
-            if function.__name__.startswith("process") and len(args) > 0:
-                token = args[0]
-                info = {"type": type_names[token['type']]}
-                if token['type'] in tagTokenTypes:
-                    info["name"] = token['name']
-
-                self.parser.log.append((self.parser.tokenizer.state.__name__,
-                                        self.parser.phase.__class__.__name__,
-                                        self.__class__.__name__,
-                                        function.__name__,
-                                        info))
-                return function(self, *args, **kwargs)
-            else:
-                return function(self, *args, **kwargs)
-        return wrapped
-
-    def getMetaclass(use_metaclass, metaclass_func):
-        if use_metaclass:
-            return method_decorator_metaclass(metaclass_func)
+class Phase(object):
+    """Base class for helper object that implements each phase of processing
+    """
+    __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")
+
+    def __init__(self, parser, tree):
+        self.parser = parser
+        self.tree = tree
+        self.__startTagCache = {}
+        self.__endTagCache = {}
+
+    def processEOF(self):
+        raise NotImplementedError
+
+    def processComment(self, token):
+        # For most phases the following is correct. Where it's not it will be
+        # overridden.
+        self.tree.insertComment(token, self.tree.openElements[-1])
+
+    def processDoctype(self, token):
+        self.parser.parseError("unexpected-doctype")
+
+    def processCharacters(self, token):
+        self.tree.insertText(token["data"])
+
+    def processSpaceCharacters(self, token):
+        self.tree.insertText(token["data"])
+
+    def processStartTag(self, token):
+        # Note the caching is done here rather than BoundMethodDispatcher as doing it there
+        # requires a circular reference to the Phase, and this ends up with a significant
+        # (CPython 2.7, 3.8) GC cost when parsing many short inputs
+        name = token["name"]
+        # In Py2, using `in` is quicker in general than try/except KeyError
+        # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
+        if name in self.__startTagCache:
+            func = self.__startTagCache[name]
         else:
-            return type
-
-    # pylint:disable=unused-argument
-    class Phase(with_metaclass(getMetaclass(debug, log))):
-        """Base class for helper object that implements each phase of processing
-        """
-        __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")
-
-        def __init__(self, parser, tree):
-            self.parser = parser
-            self.tree = tree
-            self.__startTagCache = {}
-            self.__endTagCache = {}
-
-        def processEOF(self):
-            raise NotImplementedError
-
-        def processComment(self, token):
-            # For most phases the following is correct. Where it's not it will be
-            # overridden.
-            self.tree.insertComment(token, self.tree.openElements[-1])
-
-        def processDoctype(self, token):
-            self.parser.parseError("unexpected-doctype")
-
-        def processCharacters(self, token):
-            self.tree.insertText(token["data"])
-
-        def processSpaceCharacters(self, token):
-            self.tree.insertText(token["data"])
-
-        def processStartTag(self, token):
-            # Note the caching is done here rather than BoundMethodDispatcher as doing it there
-            # requires a circular reference to the Phase, and this ends up with a significant
-            # (CPython 2.7, 3.8) GC cost when parsing many short inputs
-            name = token["name"]
-            # In Py2, using `in` is quicker in general than try/except KeyError
-            # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
-            if name in self.__startTagCache:
-                func = self.__startTagCache[name]
-            else:
-                func = self.__startTagCache[name] = self.startTagHandler[name]
-                # bound the cache size in case we get loads of unknown tags
-                while len(self.__startTagCache) > len(self.startTagHandler) * 1.1:
-                    # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
-                    self.__startTagCache.pop(next(iter(self.__startTagCache)))
-            return func(token)
-
-        def startTagHtml(self, token):
-            if not self.parser.firstStartTag and token["name"] == "html":
-                self.parser.parseError("non-html-root")
-            # XXX Need a check here to see if the first start tag token emitted is
-            # this token... If it's not, invoke self.parser.parseError().
-            for attr, value in token["data"].items():
-                if attr not in self.tree.openElements[0].attributes:
-                    self.tree.openElements[0].attributes[attr] = value
-            self.parser.firstStartTag = False
-
-        def processEndTag(self, token):
-            # Note the caching is done here rather than BoundMethodDispatcher as doing it there
-            # requires a circular reference to the Phase, and this ends up with a significant
-            # (CPython 2.7, 3.8) GC cost when parsing many short inputs
-            name = token["name"]
-            # In Py2, using `in` is quicker in general than try/except KeyError
-            # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
-            if name in self.__endTagCache:
-                func = self.__endTagCache[name]
-            else:
-                func = self.__endTagCache[name] = self.endTagHandler[name]
-                # bound the cache size in case we get loads of unknown tags
-                while len(self.__endTagCache) > len(self.endTagHandler) * 1.1:
-                    # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
-                    self.__endTagCache.pop(next(iter(self.__endTagCache)))
-            return func(token)
-
-    class InitialPhase(Phase):
-        __slots__ = tuple()
-
-        def processSpaceCharacters(self, token):
-            pass
-
-        def processComment(self, token):
-            self.tree.insertComment(token, self.tree.document)
-
-        def processDoctype(self, token):
-            name = token["name"]
-            publicId = token["publicId"]
-            systemId = token["systemId"]
-            correct = token["correct"]
-
-            if (name != "html" or publicId is not None or
-                    systemId is not None and systemId != "about:legacy-compat"):
-                self.parser.parseError("unknown-doctype")
-
-            if publicId is None:
-                publicId = ""
-
-            self.tree.insertDoctype(token)
-
-            if publicId != "":
-                publicId = publicId.translate(asciiUpper2Lower)
-
-            if (not correct or token["name"] != "html" or
-                    publicId.startswith(
-                        ("+//silmaril//dtd html pro v0r11 19970101//",
-                         "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
-                         "-//as//dtd html 3.0 aswedit + extensions//",
-                         "-//ietf//dtd html 2.0 level 1//",
-                         "-//ietf//dtd html 2.0 level 2//",
-                         "-//ietf//dtd html 2.0 strict level 1//",
-                         "-//ietf//dtd html 2.0 strict level 2//",
-                         "-//ietf//dtd html 2.0 strict//",
-                         "-//ietf//dtd html 2.0//",
-                         "-//ietf//dtd html 2.1e//",
-                         "-//ietf//dtd html 3.0//",
-                         "-//ietf//dtd html 3.2 final//",
-                         "-//ietf//dtd html 3.2//",
-                         "-//ietf//dtd html 3//",
-                         "-//ietf//dtd html level 0//",
-                         "-//ietf//dtd html level 1//",
-                         "-//ietf//dtd html level 2//",
-                         "-//ietf//dtd html level 3//",
-                         "-//ietf//dtd html strict level 0//",
-                         "-//ietf//dtd html strict level 1//",
-                         "-//ietf//dtd html strict level 2//",
-                         "-//ietf//dtd html strict level 3//",
-                         "-//ietf//dtd html strict//",
-                         "-//ietf//dtd html//",
-                         "-//metrius//dtd metrius presentational//",
-                         "-//microsoft//dtd internet explorer 2.0 html strict//",
-                         "-//microsoft//dtd internet explorer 2.0 html//",
-                         "-//microsoft//dtd internet explorer 2.0 tables//",
-                         "-//microsoft//dtd internet explorer 3.0 html strict//",
-                         "-//microsoft//dtd internet explorer 3.0 html//",
-                         "-//microsoft//dtd internet explorer 3.0 tables//",
-                         "-//netscape comm. corp.//dtd html//",
-                         "-//netscape comm. corp.//dtd strict html//",
-                         "-//o'reilly and associates//dtd html 2.0//",
-                         "-//o'reilly and associates//dtd html extended 1.0//",
-                         "-//o'reilly and associates//dtd html extended relaxed 1.0//",
-                         "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
-                         "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
-                         "-//spyglass//dtd html 2.0 extended//",
-                         "-//sq//dtd html 2.0 hotmetal + extensions//",
-                         "-//sun microsystems corp.//dtd hotjava html//",
-                         "-//sun microsystems corp.//dtd hotjava strict html//",
-                         "-//w3c//dtd html 3 1995-03-24//",
-                         "-//w3c//dtd html 3.2 draft//",
-                         "-//w3c//dtd html 3.2 final//",
-                         "-//w3c//dtd html 3.2//",
-                         "-//w3c//dtd html 3.2s draft//",
-                         "-//w3c//dtd html 4.0 frameset//",
-                         "-//w3c//dtd html 4.0 transitional//",
-                         "-//w3c//dtd html experimental 19960712//",
-                         "-//w3c//dtd html experimental 970421//",
-                         "-//w3c//dtd w3 html//",
-                         "-//w3o//dtd w3 html 3.0//",
-                         "-//webtechs//dtd mozilla html 2.0//",
-                         "-//webtechs//dtd mozilla html//")) or
-                    publicId in ("-//w3o//dtd w3 html strict 3.0//en//",
-                                 "-/w3c/dtd html 4.0 transitional/en",
-                                 "html") or
-                    publicId.startswith(
-                        ("-//w3c//dtd html 4.01 frameset//",
-                         "-//w3c//dtd html 4.01 transitional//")) and
-                    systemId is None or
-                    systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
-                self.parser.compatMode = "quirks"
-            elif (publicId.startswith(
-                    ("-//w3c//dtd xhtml 1.0 frameset//",
-                     "-//w3c//dtd xhtml 1.0 transitional//")) or
-                  publicId.startswith(
-                      ("-//w3c//dtd html 4.01 frameset//",
-                       "-//w3c//dtd html 4.01 transitional//")) and
-                  systemId is not None):
-                self.parser.compatMode = "limited quirks"
-
-            self.parser.phase = self.parser.phases["beforeHtml"]
-
-        def anythingElse(self):
+            func = self.__startTagCache[name] = self.startTagHandler[name]
+            # bound the cache size in case we get loads of unknown tags
+            while len(self.__startTagCache) > len(self.startTagHandler) * 1.1:
+                # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
+                self.__startTagCache.pop(next(iter(self.__startTagCache)))
+        return func(token)
+
+    def startTagHtml(self, token):
+        if not self.parser.firstStartTag and token["name"] == "html":
+            self.parser.parseError("non-html-root")
+        # XXX Need a check here to see if the first start tag token emitted is
+        # this token... If it's not, invoke self.parser.parseError().
+        for attr, value in token["data"].items():
+            if attr not in self.tree.openElements[0].attributes:
+                self.tree.openElements[0].attributes[attr] = value
+        self.parser.firstStartTag = False
+
+    def processEndTag(self, token):
+        # Note the caching is done here rather than BoundMethodDispatcher as doing it there
+        # requires a circular reference to the Phase, and this ends up with a significant
+        # (CPython 2.7, 3.8) GC cost when parsing many short inputs
+        name = token["name"]
+        # In Py2, using `in` is quicker in general than try/except KeyError
+        # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
+        if name in self.__endTagCache:
+            func = self.__endTagCache[name]
+        else:
+            func = self.__endTagCache[name] = self.endTagHandler[name]
+            # bound the cache size in case we get loads of unknown tags
+            while len(self.__endTagCache) > len(self.endTagHandler) * 1.1:
+                # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
+                self.__endTagCache.pop(next(iter(self.__endTagCache)))
+        return func(token)
+
+
+class InitialPhase(Phase):
+    __slots__ = tuple()
+
+    def processSpaceCharacters(self, token):
+        pass
+
+    def processComment(self, token):
+        self.tree.insertComment(token, self.tree.document)
+
+    def processDoctype(self, token):
+        name = token["name"]
+        publicId = token["publicId"]
+        systemId = token["systemId"]
+        correct = token["correct"]
+
+        if (name != "html" or publicId is not None or
+                systemId is not None and systemId != "about:legacy-compat"):
+            self.parser.parseError("unknown-doctype")
+
+        if publicId is None:
+            publicId = ""
+
+        self.tree.insertDoctype(token)
+
+        if publicId != "":
+            publicId = publicId.translate(asciiUpper2Lower)
+
+        if (not correct or token["name"] != "html" or
+                publicId.startswith(
+                    ("+//silmaril//dtd html pro v0r11 19970101//",
+                     "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+                     "-//as//dtd html 3.0 aswedit + extensions//",
+                     "-//ietf//dtd html 2.0 level 1//",
+                     "-//ietf//dtd html 2.0 level 2//",
+                     "-//ietf//dtd html 2.0 strict level 1//",
+                     "-//ietf//dtd html 2.0 strict level 2//",
+                     "-//ietf//dtd html 2.0 strict//",
+                     "-//ietf//dtd html 2.0//",
+                     "-//ietf//dtd html 2.1e//",
+                     "-//ietf//dtd html 3.0//",
+                     "-//ietf//dtd html 3.2 final//",
+                     "-//ietf//dtd html 3.2//",
+                     "-//ietf//dtd html 3//",
+                     "-//ietf//dtd html level 0//",
+                     "-//ietf//dtd html level 1//",
+                     "-//ietf//dtd html level 2//",
+                     "-//ietf//dtd html level 3//",
+                     "-//ietf//dtd html strict level 0//",
+                     "-//ietf//dtd html strict level 1//",
+                     "-//ietf//dtd html strict level 2//",
+                     "-//ietf//dtd html strict level 3//",
+                     "-//ietf//dtd html strict//",
+                     "-//ietf//dtd html//",
+                     "-//metrius//dtd metrius presentational//",
+                     "-//microsoft//dtd internet explorer 2.0 html strict//",
+                     "-//microsoft//dtd internet explorer 2.0 html//",
+                     "-//microsoft//dtd internet explorer 2.0 tables//",
+                     "-//microsoft//dtd internet explorer 3.0 html strict//",
+                     "-//microsoft//dtd internet explorer 3.0 html//",
+                     "-//microsoft//dtd internet explorer 3.0 tables//",
+                     "-//netscape comm. corp.//dtd html//",
+                     "-//netscape comm. corp.//dtd strict html//",
+                     "-//o'reilly and associates//dtd html 2.0//",
+                     "-//o'reilly and associates//dtd html extended 1.0//",
+                     "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+                     "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+                     "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+                     "-//spyglass//dtd html 2.0 extended//",
+                     "-//sq//dtd html 2.0 hotmetal + extensions//",
+                     "-//sun microsystems corp.//dtd hotjava html//",
+                     "-//sun microsystems corp.//dtd hotjava strict html//",
+                     "-//w3c//dtd html 3 1995-03-24//",
+                     "-//w3c//dtd html 3.2 draft//",
+                     "-//w3c//dtd html 3.2 final//",
+                     "-//w3c//dtd html 3.2//",
+                     "-//w3c//dtd html 3.2s draft//",
+                     "-//w3c//dtd html 4.0 frameset//",
+                     "-//w3c//dtd html 4.0 transitional//",
+                     "-//w3c//dtd html experimental 19960712//",
+                     "-//w3c//dtd html experimental 970421//",
+                     "-//w3c//dtd w3 html//",
+                     "-//w3o//dtd w3 html 3.0//",
+                     "-//webtechs//dtd mozilla html 2.0//",
+                     "-//webtechs//dtd mozilla html//")) or
+                publicId in ("-//w3o//dtd w3 html strict 3.0//en//",
+                             "-/w3c/dtd html 4.0 transitional/en",
+                             "html") or
+                publicId.startswith(
+                    ("-//w3c//dtd html 4.01 frameset//",
+                     "-//w3c//dtd html 4.01 transitional//")) and
+                systemId is None or
+                systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
             self.parser.compatMode = "quirks"
-            self.parser.phase = self.parser.phases["beforeHtml"]
-
-        def processCharacters(self, token):
-            self.parser.parseError("expected-doctype-but-got-chars")
-            self.anythingElse()
-            return token
-
-        def processStartTag(self, token):
-            self.parser.parseError("expected-doctype-but-got-start-tag",
+        elif (publicId.startswith(
+                ("-//w3c//dtd xhtml 1.0 frameset//",
+                 "-//w3c//dtd xhtml 1.0 transitional//")) or
+              publicId.startswith(
+                  ("-//w3c//dtd html 4.01 frameset//",
+                   "-//w3c//dtd html 4.01 transitional//")) and
+              systemId is not None):
+            self.parser.compatMode = "limited quirks"
+
+        self.parser.phase = self.parser.phases["beforeHtml"]
+
+    def anythingElse(self):
+        self.parser.compatMode = "quirks"
+        self.parser.phase = self.parser.phases["beforeHtml"]
+
+    def processCharacters(self, token):
+        self.parser.parseError("expected-doctype-but-got-chars")
+        self.anythingElse()
+        return token
+
+    def processStartTag(self, token):
+        self.parser.parseError("expected-doctype-but-got-start-tag",
+                               {"name": token["name"]})
+        self.anythingElse()
+        return token
+
+    def processEndTag(self, token):
+        self.parser.parseError("expected-doctype-but-got-end-tag",
+                               {"name": token["name"]})
+        self.anythingElse()
+        return token
+
+    def processEOF(self):
+        self.parser.parseError("expected-doctype-but-got-eof")
+        self.anythingElse()
+        return True
+
+
+class BeforeHtmlPhase(Phase):
+    __slots__ = tuple()
+
+    # helper methods
+    def insertHtmlElement(self):
+        self.tree.insertRoot(impliedTagToken("html", "StartTag"))
+        self.parser.phase = self.parser.phases["beforeHead"]
+
+    # other
+    def processEOF(self):
+        self.insertHtmlElement()
+        return True
+
+    def processComment(self, token):
+        self.tree.insertComment(token, self.tree.document)
+
+    def processSpaceCharacters(self, token):
+        pass
+
+    def processCharacters(self, token):
+        self.insertHtmlElement()
+        return token
+
+    def processStartTag(self, token):
+        if token["name"] == "html":
+            self.parser.firstStartTag = True
+        self.insertHtmlElement()
+        return token
+
+    def processEndTag(self, token):
+        if token["name"] not in ("head", "body", "html", "br"):
+            self.parser.parseError("unexpected-end-tag-before-html",
                                    {"name": token["name"]})
-            self.anythingElse()
+        else:
+            self.insertHtmlElement()
             return token
 
-        def processEndTag(self, token):
-            self.parser.parseError("expected-doctype-but-got-end-tag",
-                                   {"name": token["name"]})
-            self.anythingElse()
-            return token
 
-        def processEOF(self):
-            self.parser.parseError("expected-doctype-but-got-eof")
-            self.anythingElse()
-            return True
+class BeforeHeadPhase(Phase):
+    __slots__ = tuple()
 
-    class BeforeHtmlPhase(Phase):
-        __slots__ = tuple()
+    def processEOF(self):
+        self.startTagHead(impliedTagToken("head", "StartTag"))
+        return True
 
-        # helper methods
-        def insertHtmlElement(self):
-            self.tree.insertRoot(impliedTagToken("html", "StartTag"))
-            self.parser.phase = self.parser.phases["beforeHead"]
+    def processSpaceCharacters(self, token):
+        pass
 
-        # other
-        def processEOF(self):
-            self.insertHtmlElement()
-            return True
+    def processCharacters(self, token):
+        self.startTagHead(impliedTagToken("head", "StartTag"))
+        return token
 
-        def processComment(self, token):
-            self.tree.insertComment(token, self.tree.document)
+    def startTagHtml(self, token):
+        return self.parser.phases["inBody"].processStartTag(token)
 
-        def processSpaceCharacters(self, token):
-            pass
+    def startTagHead(self, token):
+        self.tree.insertElement(token)
+        self.tree.headPointer = self.tree.openElements[-1]
+        self.parser.phase = self.parser.phases["inHead"]
 
-        def processCharacters(self, token):
-            self.insertHtmlElement()
-            return token
+    def startTagOther(self, token):
+        self.startTagHead(impliedTagToken("head", "StartTag"))
+        return token
 
-        def processStartTag(self, token):
-            if token["name"] == "html":
-                self.parser.firstStartTag = True
-            self.insertHtmlElement()
-            return token
+    def endTagImplyHead(self, token):
+        self.startTagHead(impliedTagToken("head", "StartTag"))
+        return token
 
-        def processEndTag(self, token):
-            if token["name"] not in ("head", "body", "html", "br"):
-                self.parser.parseError("unexpected-end-tag-before-html",
-                                       {"name": token["name"]})
-            else:
-                self.insertHtmlElement()
-                return token
+    def endTagOther(self, token):
+        self.parser.parseError("end-tag-after-implied-root",
+                               {"name": token["name"]})
 
-    class BeforeHeadPhase(Phase):
-        __slots__ = tuple()
+    startTagHandler = _utils.MethodDispatcher([
+        ("html", startTagHtml),
+        ("head", startTagHead)
+    ])
+    startTagHandler.default = startTagOther
 
-        def processEOF(self):
-            self.startTagHead(impliedTagToken("head", "StartTag"))
-            return True
+    endTagHandler = _utils.MethodDispatcher([
+        (("head", "body", "html", "br"), endTagImplyHead)
+    ])
+    endTagHandler.default = endTagOther
 
-        def processSpaceCharacters(self, token):
-            pass
 
-        def processCharacters(self, token):
-            self.startTagHead(impliedTagToken("head", "StartTag"))
-            return token
+class InHeadPhase(Phase):
+    __slots__ = tuple()
 
-        def startTagHtml(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
+    # the real thing
+    def processEOF(self):
+        self.anythingElse()
+        return True
 
-        def startTagHead(self, token):
-            self.tree.insertElement(token)
-            self.tree.headPointer = self.tree.openElements[-1]
-            self.parser.phase = self.parser.phases["inHead"]
+    def processCharacters(self, token):
+        self.anythingElse()
+        return token
 
-        def startTagOther(self, token):
-            self.startTagHead(impliedTagToken("head", "StartTag"))
-            return token
+    def startTagHtml(self, token):
+        return self.parser.phases["inBody"].processStartTag(token)
 
-        def endTagImplyHead(self, token):
-            self.startTagHead(impliedTagToken("head", "StartTag"))
-            return token
+    def startTagHead(self, token):
+        self.parser.parseError("two-heads-are-not-better-than-one")
 
-        def endTagOther(self, token):
-            self.parser.parseError("end-tag-after-implied-root",
-                                   {"name": token["name"]})
+    def startTagBaseLinkCommand(self, token):
+        self.tree.insertElement(token)
+        self.tree.openElements.pop()
+        token["selfClosingAcknowledged"] = True
 
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", startTagHtml),
-            ("head", startTagHead)
-        ])
-        startTagHandler.default = startTagOther
+    def startTagMeta(self, token):
+        self.tree.insertElement(token)
+        self.tree.openElements.pop()
+        token["selfClosingAcknowledged"] = True
+
+        attributes = token["data"]
+        if self.parser.tokenizer.stream.charEncoding[1] == "tentative":
+            if "charset" in attributes:
+                self.parser.tokenizer.stream.changeEncoding(attributes["charset"])
+            elif ("content" in attributes and
+                  "http-equiv" in attributes and
+                  attributes["http-equiv"].lower() == "content-type"):
+                # Encoding it as UTF-8 here is a hack, as really we should pass
+                # the abstract Unicode string, and just use the
+                # ContentAttrParser on that, but using UTF-8 allows all chars
+                # to be encoded and as a ASCII-superset works.
+                data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
+                parser = _inputstream.ContentAttrParser(data)
+                codec = parser.parse()
+                self.parser.tokenizer.stream.changeEncoding(codec)
+
+    def startTagTitle(self, token):
+        self.parser.parseRCDataRawtext(token, "RCDATA")
+
+    def startTagNoFramesStyle(self, token):
+        # Need to decide whether to implement the scripting-disabled case
+        self.parser.parseRCDataRawtext(token, "RAWTEXT")
+
+    def startTagNoscript(self, token):
+        if self.parser.scripting:
+            self.parser.parseRCDataRawtext(token, "RAWTEXT")
+        else:
+            self.tree.insertElement(token)
+            self.parser.phase = self.parser.phases["inHeadNoscript"]
 
-        endTagHandler = _utils.MethodDispatcher([
-            (("head", "body", "html", "br"), endTagImplyHead)
-        ])
-        endTagHandler.default = endTagOther
+    def startTagScript(self, token):
+        self.tree.insertElement(token)
+        self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState
+        self.parser.originalPhase = self.parser.phase
+        self.parser.phase = self.parser.phases["text"]
+
+    def startTagOther(self, token):
+        self.anythingElse()
+        return token
+
+    def endTagHead(self, token):
+        node = self.parser.tree.openElements.pop()
+        assert node.name == "head", "Expected head got %s" % node.name
+        self.parser.phase = self.parser.phases["afterHead"]
+
+    def endTagHtmlBodyBr(self, token):
+        self.anythingElse()
+        return token
+
+    def endTagOther(self, token):
+        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+    def anythingElse(self):
+        self.endTagHead(impliedTagToken("head"))
+
+    startTagHandler = _utils.MethodDispatcher([
+        ("html", startTagHtml),
+        ("title", startTagTitle),
+        (("noframes", "style"), startTagNoFramesStyle),
+        ("noscript", startTagNoscript),
+        ("script", startTagScript),
+        (("base", "basefont", "bgsound", "command", "link"),
+         startTagBaseLinkCommand),
+        ("meta", startTagMeta),
+        ("head", startTagHead)
+    ])
+    startTagHandler.default = startTagOther
+
+    endTagHandler = _utils.MethodDispatcher([
+        ("head", endTagHead),
+        (("br", "html", "body"), endTagHtmlBodyBr)
+    ])
+    endTagHandler.default = endTagOther
+
+
+class InHeadNoscriptPhase(Phase):
+    __slots__ = tuple()
+
+    def processEOF(self):
+        self.parser.parseError("eof-in-head-noscript")
+        self.anythingElse()
+        return True
+
+    def processComment(self, token):
+        return self.parser.phases["inHead"].processComment(token)
+
+    def processCharacters(self, token):
+        self.parser.parseError("char-in-head-noscript")
+        self.anythingElse()
+        return token
+
+    def processSpaceCharacters(self, token):
+        return self.parser.phases["inHead"].processSpaceCharacters(token)
+
+    def startTagHtml(self, token):
+        return self.parser.phases["inBody"].processStartTag(token)
+
+    def startTagBaseLinkCommand(self, token):
+        return self.parser.phases["inHead"].processStartTag(token)
+
+    def startTagHeadNoscript(self, token):
+        self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
+
+    def startTagOther(self, token):
+        self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
+        self.anythingElse()
+        return token
+
+    def endTagNoscript(self, token):
+        node = self.parser.tree.openElements.pop()
+        assert node.name == "noscript", "Expected noscript got %s" % node.name
+        self.parser.phase = self.parser.phases["inHead"]
+
+    def endTagBr(self, token):
+        self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
+        self.anythingElse()
+        return token
+
+    def endTagOther(self, token):
+        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+    def anythingElse(self):
+        # Caller must raise parse error first!
+        self.endTagNoscript(impliedTagToken("noscript"))
+
+    startTagHandler = _utils.MethodDispatcher([
+        ("html", startTagHtml),
+        (("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand),
+        (("head", "noscript"), startTagHeadNoscript),
+    ])
+    startTagHandler.default = startTagOther
+
+    endTagHandler = _utils.MethodDispatcher([
+        ("noscript", endTagNoscript),
+        ("br", endTagBr),
+    ])
+    endTagHandler.default = endTagOther
+
+
+class AfterHeadPhase(Phase):
+    __slots__ = tuple()
+
+    def processEOF(self):
+        self.anythingElse()
+        return True
+
+    def processCharacters(self, token):
+        self.anythingElse()
+        return token
+
+    def startTagHtml(self, token):
+        return self.parser.phases["inBody"].processStartTag(token)
+
+    def startTagBody(self, token):
+        self.parser.framesetOK = False
+        self.tree.insertElement(token)
+        self.parser.phase = self.parser.phases["inBody"]
 
-    class InHeadPhase(Phase):
-        __slots__ = tuple()
+    def startTagFrameset(self, token):
+        self.tree.insertElement(token)
+        self.parser.phase = self.parser.phases["inFrameset"]
 
-        # the real thing
-        def processEOF(self):
-            self.anythingElse()
-            return True
+    def startTagFromHead(self, token):
+        self.parser.parseError("unexpected-start-tag-out-of-my-head",
+                               {"name": token["name"]})
+        self.tree.openElements.append(self.tree.headPointer)
+        self.parser.phases["inHead"].processStartTag(token)
+        for node in self.tree.openElements[::-1]:
+            if node.name == "head":
+                self.tree.openElements.remove(node)
+                break
 
-        def processCharacters(self, token):
-            self.anythingElse()
-            return token
+    def startTagHead(self, token):
+        self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
+
+    def startTagOther(self, token):
+        self.anythingElse()
+        return token
+
+    def endTagHtmlBodyBr(self, token):
+        self.anythingElse()
+        return token
+
+    def endTagOther(self, token):
+        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+    def anythingElse(self):
+        self.tree.insertElement(impliedTagToken("body", "StartTag"))
+        self.parser.phase = self.parser.phases["inBody"]
+        self.parser.framesetOK = True
+
+    startTagHandler = _utils.MethodDispatcher([
+        ("html", startTagHtml),
+        ("body", startTagBody),
+        ("frameset", startTagFrameset),
+        (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
+          "style", "title"),
+         startTagFromHead),
+        ("head", startTagHead)
+    ])
+    startTagHandler.default = startTagOther
+    endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
+                                              endTagHtmlBodyBr)])
+    endTagHandler.default = endTagOther
+
+
+class InBodyPhase(Phase):
+    # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
+    # the really-really-really-very crazy mode
+    __slots__ = ("processSpaceCharacters",)
+
+    def __init__(self, *args, **kwargs):
+        super(InBodyPhase, self).__init__(*args, **kwargs)
+        # Set this to the default handler
+        self.processSpaceCharacters = self.processSpaceCharactersNonPre
+
+    def isMatchingFormattingElement(self, node1, node2):
+        return (node1.name == node2.name and
+                node1.namespace == node2.namespace and
+                node1.attributes == node2.attributes)
+
+    # helper
+    def addFormattingElement(self, token):
+        self.tree.insertElement(token)
+        element = self.tree.openElements[-1]
 
-        def startTagHtml(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
+        matchingElements = []
+        for node in self.tree.activeFormattingElements[::-1]:
+            if node is Marker:
+                break
+            elif self.isMatchingFormattingElement(node, element):
+                matchingElements.append(node)
+
+        assert len(matchingElements) <= 3
+        if len(matchingElements) == 3:
+            self.tree.activeFormattingElements.remove(matchingElements[-1])
+        self.tree.activeFormattingElements.append(element)
+
+    # the real deal
+    def processEOF(self):
+        allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",
+                                      "tfoot", "th", "thead", "tr", "body",
+                                      "html"))
+        for node in self.tree.openElements[::-1]:
+            if node.name not in allowed_elements:
+                self.parser.parseError("expected-closing-tag-but-got-eof")
+                break
+        # Stop parsing
+
+    def processSpaceCharactersDropNewline(self, token):
+        # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
+        # want to drop leading newlines
+        data = token["data"]
+        self.processSpaceCharacters = self.processSpaceCharactersNonPre
+        if (data.startswith("\n") and
+            self.tree.openElements[-1].name in ("pre", "listing", "textarea") and
+                not self.tree.openElements[-1].hasContent()):
+            data = data[1:]
+        if data:
+            self.tree.reconstructActiveFormattingElements()
+            self.tree.insertText(data)
+
+    def processCharacters(self, token):
+        if token["data"] == "\u0000":
+            # The tokenizer should always emit null on its own
+            return
+        self.tree.reconstructActiveFormattingElements()
+        self.tree.insertText(token["data"])
+        # This must be bad for performance
+        if (self.parser.framesetOK and
+            any(char not in spaceCharacters
+                for char in token["data"])):
+            self.parser.framesetOK = False
 
-        def startTagHead(self, token):
-            self.parser.parseError("two-heads-are-not-better-than-one")
+    def processSpaceCharactersNonPre(self, token):
+        self.tree.reconstructActiveFormattingElements()
+        self.tree.insertText(token["data"])
 
-        def startTagBaseLinkCommand(self, token):
-            self.tree.insertElement(token)
-            self.tree.openElements.pop()
-            token["selfClosingAcknowledged"] = True
+    def startTagProcessInHead(self, token):
+        return self.parser.phases["inHead"].processStartTag(token)
 
-        def startTagMeta(self, token):
+    def startTagBody(self, token):
+        self.parser.parseError("unexpected-start-tag", {"name": "body"})
+        if (len(self.tree.openElements) == 1 or
+                self.tree.openElements[1].name != "body"):
+            assert self.parser.innerHTML
+        else:
+            self.parser.framesetOK = False
+            for attr, value in token["data"].items():
+                if attr not in self.tree.openElements[1].attributes:
+                    self.tree.openElements[1].attributes[attr] = value
+
+    def startTagFrameset(self, token):
+        self.parser.parseError("unexpected-start-tag", {"name": "frameset"})
+        if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"):
+            assert self.parser.innerHTML
+        elif not self.parser.framesetOK:
+            pass
+        else:
+            if self.tree.openElements[1].parent:
+                self.tree.openElements[1].parent.removeChild(self.tree.openElements[1])
+            while self.tree.openElements[-1].name != "html":
+                self.tree.openElements.pop()
             self.tree.insertElement(token)
-            self.tree.openElements.pop()
-            token["selfClosingAcknowledged"] = True
+            self.parser.phase = self.parser.phases["inFrameset"]
 
-            attributes = token["data"]
-            if self.parser.tokenizer.stream.charEncoding[1] == "tentative":
-                if "charset" in attributes:
-                    self.parser.tokenizer.stream.changeEncoding(attributes["charset"])
-                elif ("content" in attributes and
-                      "http-equiv" in attributes and
-                      attributes["http-equiv"].lower() == "content-type"):
-                    # Encoding it as UTF-8 here is a hack, as really we should pass
-                    # the abstract Unicode string, and just use the
-                    # ContentAttrParser on that, but using UTF-8 allows all chars
-                    # to be encoded and as a ASCII-superset works.
-                    data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
-                    parser = _inputstream.ContentAttrParser(data)
-                    codec = parser.parse()
-                    self.parser.tokenizer.stream.changeEncoding(codec)
-
-        def startTagTitle(self, token):
-            self.parser.parseRCDataRawtext(token, "RCDATA")
-
-        def startTagNoFramesStyle(self, token):
-            # Need to decide whether to implement the scripting-disabled case
-            self.parser.parseRCDataRawtext(token, "RAWTEXT")
+    def startTagCloseP(self, token):
+        if self.tree.elementInScope("p", variant="button"):
+            self.endTagP(impliedTagToken("p"))
+        self.tree.insertElement(token)
 
-        def startTagNoscript(self, token):
-            if self.parser.scripting:
-                self.parser.parseRCDataRawtext(token, "RAWTEXT")
-            else:
-                self.tree.insertElement(token)
-                self.parser.phase = self.parser.phases["inHeadNoscript"]
+    def startTagPreListing(self, token):
+        if self.tree.elementInScope("p", variant="button"):
+            self.endTagP(impliedTagToken("p"))
+        self.tree.insertElement(token)
+        self.parser.framesetOK = False
+        self.processSpaceCharacters = self.processSpaceCharactersDropNewline
 
-        def startTagScript(self, token):
+    def startTagForm(self, token):
+        if self.tree.formPointer:
+            self.parser.parseError("unexpected-start-tag", {"name": "form"})
+        else:
+            if self.tree.elementInScope("p", variant="button"):
+                self.endTagP(impliedTagToken("p"))
             self.tree.insertElement(token)
-            self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState
-            self.parser.originalPhase = self.parser.phase
-            self.parser.phase = self.parser.phases["text"]
+            self.tree.formPointer = self.tree.openElements[-1]
 
-        def startTagOther(self, token):
-            self.anythingElse()
-            return token
+    def startTagListItem(self, token):
+        self.parser.framesetOK = False
 
-        def endTagHead(self, token):
-            node = self.parser.tree.openElements.pop()
-            assert node.name == "head", "Expected head got %s" % node.name
-            self.parser.phase = self.parser.phases["afterHead"]
+        stopNamesMap = {"li": ["li"],
+                        "dt": ["dt", "dd"],
+                        "dd": ["dt", "dd"]}
+        stopNames = stopNamesMap[token["name"]]
+        for node in reversed(self.tree.openElements):
+            if node.name in stopNames:
+                self.parser.phase.processEndTag(
+                    impliedTagToken(node.name, "EndTag"))
+                break
+            if (node.nameTuple in specialElements and
+                    node.name not in ("address", "div", "p")):
+                break
 
-        def endTagHtmlBodyBr(self, token):
-            self.anythingElse()
-            return token
+        if self.tree.elementInScope("p", variant="button"):
+            self.parser.phase.processEndTag(
+                impliedTagToken("p", "EndTag"))
 
-        def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+        self.tree.insertElement(token)
 
-        def anythingElse(self):
-            self.endTagHead(impliedTagToken("head"))
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", startTagHtml),
-            ("title", startTagTitle),
-            (("noframes", "style"), startTagNoFramesStyle),
-            ("noscript", startTagNoscript),
-            ("script", startTagScript),
-            (("base", "basefont", "bgsound", "command", "link"),
-             startTagBaseLinkCommand),
-            ("meta", startTagMeta),
-            ("head", startTagHead)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("head", endTagHead),
-            (("br", "html", "body"), endTagHtmlBodyBr)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InHeadNoscriptPhase(Phase):
-        __slots__ = tuple()
-
-        def processEOF(self):
-            self.parser.parseError("eof-in-head-noscript")
-            self.anythingElse()
-            return True
-
-        def processComment(self, token):
-            return self.parser.phases["inHead"].processComment(token)
-
-        def processCharacters(self, token):
-            self.parser.parseError("char-in-head-noscript")
-            self.anythingElse()
-            return token
+    def startTagPlaintext(self, token):
+        if self.tree.elementInScope("p", variant="button"):
+            self.endTagP(impliedTagToken("p"))
+        self.tree.insertElement(token)
+        self.parser.tokenizer.state = self.parser.tokenizer.plaintextState
 
-        def processSpaceCharacters(self, token):
-            return self.parser.phases["inHead"].processSpaceCharacters(token)
+    def startTagHeading(self, token):
+        if self.tree.elementInScope("p", variant="button"):
+            self.endTagP(impliedTagToken("p"))
+        if self.tree.openElements[-1].name in headingElements:
+            self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
+            self.tree.openElements.pop()
+        self.tree.insertElement(token)
 
-        def startTagHtml(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
+    def startTagA(self, token):
+        afeAElement = self.tree.elementInActiveFormattingElements("a")
+        if afeAElement:
+            self.parser.parseError("unexpected-start-tag-implies-end-tag",
+                                   {"startName": "a", "endName": "a"})
+            self.endTagFormatting(impliedTagToken("a"))
+            if afeAElement in self.tree.openElements:
+                self.tree.openElements.remove(afeAElement)
+            if afeAElement in self.tree.activeFormattingElements:
+                self.tree.activeFormattingElements.remove(afeAElement)
+        self.tree.reconstructActiveFormattingElements()
+        self.addFormattingElement(token)
+
+    def startTagFormatting(self, token):
+        self.tree.reconstructActiveFormattingElements()
+        self.addFormattingElement(token)
+
+    def startTagNobr(self, token):
+        self.tree.reconstructActiveFormattingElements()
+        if self.tree.elementInScope("nobr"):
+            self.parser.parseError("unexpected-start-tag-implies-end-tag",
+                                   {"startName": "nobr", "endName": "nobr"})
+            self.processEndTag(impliedTagToken("nobr"))
+            # XXX Need tests that trigger the following
+            self.tree.reconstructActiveFormattingElements()
+        self.addFormattingElement(token)
 
-        def startTagBaseLinkCommand(self, token):
-            return self.parser.phases["inHead"].processStartTag(token)
+    def startTagButton(self, token):
+        if self.tree.elementInScope("button"):
+            self.parser.parseError("unexpected-start-tag-implies-end-tag",
+                                   {"startName": "button", "endName": "button"})
+            self.processEndTag(impliedTagToken("button"))
+            return token
+        else:
+            self.tree.reconstructActiveFormattingElements()
+            self.tree.insertElement(token)
+            self.parser.framesetOK = False
 
-        def startTagHeadNoscript(self, token):
-            self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
+    def startTagAppletMarqueeObject(self, token):
+        self.tree.reconstructActiveFormattingElements()
+        self.tree.insertElement(token)
+        self.tree.activeFormattingElements.append(Marker)
+        self.parser.framesetOK = False
+
+    def startTagXmp(self, token):
+        if self.tree.elementInScope("p", variant="button"):
+            self.endTagP(impliedTagToken("p"))
+        self.tree.reconstructActiveFormattingElements()
+        self.parser.framesetOK = False
+        self.parser.parseRCDataRawtext(token, "RAWTEXT")
+
+    def startTagTable(self, token):
+        if self.parser.compatMode != "quirks":
+            if self.tree.elementInScope("p", variant="button"):
+                self.processEndTag(impliedTagToken("p"))
+        self.tree.insertElement(token)
+        self.parser.framesetOK = False
+        self.parser.phase = self.parser.phases["inTable"]
 
-        def startTagOther(self, token):
-            self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
-            self.anythingElse()
-            return token
+    def startTagVoidFormatting(self, token):
+        self.tree.reconstructActiveFormattingElements()
+        self.tree.insertElement(token)
+        self.tree.openElements.pop()
+        token["selfClosingAcknowledged"] = True
+        self.parser.framesetOK = False
+
+    def startTagInput(self, token):
+        framesetOK = self.parser.framesetOK
+        self.startTagVoidFormatting(token)
+        if ("type" in token["data"] and
+                token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
+            # input type=hidden doesn't change framesetOK
+            self.parser.framesetOK = framesetOK
+
+    def startTagParamSource(self, token):
+        self.tree.insertElement(token)
+        self.tree.openElements.pop()
+        token["selfClosingAcknowledged"] = True
 
-        def endTagNoscript(self, token):
-            node = self.parser.tree.openElements.pop()
-            assert node.name == "noscript", "Expected noscript got %s" % node.name
-            self.parser.phase = self.parser.phases["inHead"]
+    def startTagHr(self, token):
+        if self.tree.elementInScope("p", variant="button"):
+            self.endTagP(impliedTagToken("p"))
+        self.tree.insertElement(token)
+        self.tree.openElements.pop()
+        token["selfClosingAcknowledged"] = True
+        self.parser.framesetOK = False
+
+    def startTagImage(self, token):
+        # No really...
+        self.parser.parseError("unexpected-start-tag-treated-as",
+                               {"originalName": "image", "newName": "img"})
+        self.processStartTag(impliedTagToken("img", "StartTag",
+                                             attributes=token["data"],
+                                             selfClosing=token["selfClosing"]))
+
+    def startTagIsIndex(self, token):
+        self.parser.parseError("deprecated-tag", {"name": "isindex"})
+        if self.tree.formPointer:
+            return
+        form_attrs = {}
+        if "action" in token["data"]:
+            form_attrs["action"] = token["data"]["action"]
+        self.processStartTag(impliedTagToken("form", "StartTag",
+                                             attributes=form_attrs))
+        self.processStartTag(impliedTagToken("hr", "StartTag"))
+        self.processStartTag(impliedTagToken("label", "StartTag"))
+        # XXX Localization ...
+        if "prompt" in token["data"]:
+            prompt = token["data"]["prompt"]
+        else:
+            prompt = "This is a searchable index. Enter search keywords: "
+        self.processCharacters(
+            {"type": tokenTypes["Characters"], "data": prompt})
+        attributes = token["data"].copy()
+        if "action" in attributes:
+            del attributes["action"]
+        if "prompt" in attributes:
+            del attributes["prompt"]
+        attributes["name"] = "isindex"
+        self.processStartTag(impliedTagToken("input", "StartTag",
+                                             attributes=attributes,
+                                             selfClosing=token["selfClosing"]))
+        self.processEndTag(impliedTagToken("label"))
+        self.processStartTag(impliedTagToken("hr", "StartTag"))
+        self.processEndTag(impliedTagToken("form"))
+
+    def startTagTextarea(self, token):
+        self.tree.insertElement(token)
+        self.parser.tokenizer.state = self.parser.tokenizer.rcdataState
+        self.processSpaceCharacters = self.processSpaceCharactersDropNewline
+        self.parser.framesetOK = False
 
-        def endTagBr(self, token):
-            self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
-            self.anythingElse()
-            return token
+    def startTagIFrame(self, token):
+        self.parser.framesetOK = False
+        self.startTagRawtext(token)
 
-        def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+    def startTagNoscript(self, token):
+        if self.parser.scripting:
+            self.startTagRawtext(token)
+        else:
+            self.startTagOther(token)
 
-        def anythingElse(self):
-            # Caller must raise parse error first!
-            self.endTagNoscript(impliedTagToken("noscript"))
+    def startTagRawtext(self, token):
+        """iframe, noembed noframes, noscript(if scripting enabled)"""
+        self.parser.parseRCDataRawtext(token, "RAWTEXT")
 
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", startTagHtml),
-            (("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand),
-            (("head", "noscript"), startTagHeadNoscript),
-        ])
-        startTagHandler.default = startTagOther
+    def startTagOpt(self, token):
+        if self.tree.openElements[-1].name == "option":
+            self.parser.phase.processEndTag(impliedTagToken("option"))
+        self.tree.reconstructActiveFormattingElements()
+        self.parser.tree.insertElement(token)
 
-        endTagHandler = _utils.MethodDispatcher([
-            ("noscript", endTagNoscript),
-            ("br", endTagBr),
-        ])
-        endTagHandler.default = endTagOther
+    def startTagSelect(self, token):
+        self.tree.reconstructActiveFormattingElements()
+        self.tree.insertElement(token)
+        self.parser.framesetOK = False
+        if self.parser.phase in (self.parser.phases["inTable"],
+                                 self.parser.phases["inCaption"],
+                                 self.parser.phases["inColumnGroup"],
+                                 self.parser.phases["inTableBody"],
+                                 self.parser.phases["inRow"],
+                                 self.parser.phases["inCell"]):
+            self.parser.phase = self.parser.phases["inSelectInTable"]
+        else:
+            self.parser.phase = self.parser.phases["inSelect"]
 
-    class AfterHeadPhase(Phase):
-        __slots__ = tuple()
+    def startTagRpRt(self, token):
+        if self.tree.elementInScope("ruby"):
+            self.tree.generateImpliedEndTags()
+            if self.tree.openElements[-1].name != "ruby":
+                self.parser.parseError()
+        self.tree.insertElement(token)
 
-        def processEOF(self):
-            self.anythingElse()
-            return True
+    def startTagMath(self, token):
+        self.tree.reconstructActiveFormattingElements()
+        self.parser.adjustMathMLAttributes(token)
+        self.parser.adjustForeignAttributes(token)
+        token["namespace"] = namespaces["mathml"]
+        self.tree.insertElement(token)
+        # Need to get the parse error right for the case where the token
+        # has a namespace not equal to the xmlns attribute
+        if token["selfClosing"]:
+            self.tree.openElements.pop()
+            token["selfClosingAcknowledged"] = True
 
-        def processCharacters(self, token):
-            self.anythingElse()
-            return token
+    def startTagSvg(self, token):
+        self.tree.reconstructActiveFormattingElements()
+        self.parser.adjustSVGAttributes(token)
+        self.parser.adjustForeignAttributes(token)
+        token["namespace"] = namespaces["svg"]
+        self.tree.insertElement(token)
+        # Need to get the parse error right for the case where the token
+        # has a namespace not equal to the xmlns attribute
+        if token["selfClosing"]:
+            self.tree.openElements.pop()
+            token["selfClosingAcknowledged"] = True
 
-        def startTagHtml(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
+    def startTagMisplaced(self, token):
+        """ Elements that should be children of other elements that have a
+        different insertion mode; here they are ignored
+        "caption", "col", "colgroup", "frame", "frameset", "head",
+        "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
+        "tr", "noscript"
+        """
+        self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]})
 
-        def startTagBody(self, token):
-            self.parser.framesetOK = False
-            self.tree.insertElement(token)
-            self.parser.phase = self.parser.phases["inBody"]
+    def startTagOther(self, token):
+        self.tree.reconstructActiveFormattingElements()
+        self.tree.insertElement(token)
 
-        def startTagFrameset(self, token):
-            self.tree.insertElement(token)
-            self.parser.phase = self.parser.phases["inFrameset"]
+    def endTagP(self, token):
+        if not self.tree.elementInScope("p", variant="button"):
+            self.startTagCloseP(impliedTagToken("p", "StartTag"))
+            self.parser.parseError("unexpected-end-tag", {"name": "p"})
+            self.endTagP(impliedTagToken("p", "EndTag"))
+        else:
+            self.tree.generateImpliedEndTags("p")
+            if self.tree.openElements[-1].name != "p":
+                self.parser.parseError("unexpected-end-tag", {"name": "p"})
+            node = self.tree.openElements.pop()
+            while node.name != "p":
+                node = self.tree.openElements.pop()
 
-        def startTagFromHead(self, token):
-            self.parser.parseError("unexpected-start-tag-out-of-my-head",
-                                   {"name": token["name"]})
-            self.tree.openElements.append(self.tree.headPointer)
-            self.parser.phases["inHead"].processStartTag(token)
-            for node in self.tree.openElements[::-1]:
-                if node.name == "head":
-                    self.tree.openElements.remove(node)
+    def endTagBody(self, token):
+        if not self.tree.elementInScope("body"):
+            self.parser.parseError()
+            return
+        elif self.tree.openElements[-1].name != "body":
+            for node in self.tree.openElements[2:]:
+                if node.name not in frozenset(("dd", "dt", "li", "optgroup",
+                                               "option", "p", "rp", "rt",
+                                               "tbody", "td", "tfoot",
+                                               "th", "thead", "tr", "body",
+                                               "html")):
+                    # Not sure this is the correct name for the parse error
+                    self.parser.parseError(
+                        "expected-one-end-tag-but-got-another",
+                        {"gotName": "body", "expectedName": node.name})
                     break
+        self.parser.phase = self.parser.phases["afterBody"]
 
-        def startTagHead(self, token):
-            self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
-
-        def startTagOther(self, token):
-            self.anythingElse()
+    def endTagHtml(self, token):
+        # We repeat the test for the body end tag token being ignored here
+        if self.tree.elementInScope("body"):
+            self.endTagBody(impliedTagToken("body"))
             return token
 
-        def endTagHtmlBodyBr(self, token):
-            self.anythingElse()
-            return token
+    def endTagBlock(self, token):
+        # Put us back in the right whitespace handling mode
+        if token["name"] == "pre":
+            self.processSpaceCharacters = self.processSpaceCharactersNonPre
+        inScope = self.tree.elementInScope(token["name"])
+        if inScope:
+            self.tree.generateImpliedEndTags()
+        if self.tree.openElements[-1].name != token["name"]:
+            self.parser.parseError("end-tag-too-early", {"name": token["name"]})
+        if inScope:
+            node = self.tree.openElements.pop()
+            while node.name != token["name"]:
+                node = self.tree.openElements.pop()
 
-        def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+    def endTagForm(self, token):
+        node = self.tree.formPointer
+        self.tree.formPointer = None
+        if node is None or not self.tree.elementInScope(node):
+            self.parser.parseError("unexpected-end-tag",
+                                   {"name": "form"})
+        else:
+            self.tree.generateImpliedEndTags()
+            if self.tree.openElements[-1] != node:
+                self.parser.parseError("end-tag-too-early-ignored",
+                                       {"name": "form"})
+            self.tree.openElements.remove(node)
 
-        def anythingElse(self):
-            self.tree.insertElement(impliedTagToken("body", "StartTag"))
-            self.parser.phase = self.parser.phases["inBody"]
-            self.parser.framesetOK = True
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", startTagHtml),
-            ("body", startTagBody),
-            ("frameset", startTagFrameset),
-            (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
-              "style", "title"),
-             startTagFromHead),
-            ("head", startTagHead)
-        ])
-        startTagHandler.default = startTagOther
-        endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
-                                                  endTagHtmlBodyBr)])
-        endTagHandler.default = endTagOther
-
-    class InBodyPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
-        # the really-really-really-very crazy mode
-        __slots__ = ("processSpaceCharacters",)
-
-        def __init__(self, *args, **kwargs):
-            super(InBodyPhase, self).__init__(*args, **kwargs)
-            # Set this to the default handler
-            self.processSpaceCharacters = self.processSpaceCharactersNonPre
+    def endTagListItem(self, token):
+        if token["name"] == "li":
+            variant = "list"
+        else:
+            variant = None
+        if not self.tree.elementInScope(token["name"], variant=variant):
+            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+        else:
+            self.tree.generateImpliedEndTags(exclude=token["name"])
+            if self.tree.openElements[-1].name != token["name"]:
+                self.parser.parseError(
+                    "end-tag-too-early",
+                    {"name": token["name"]})
+            node = self.tree.openElements.pop()
+            while node.name != token["name"]:
+                node = self.tree.openElements.pop()
 
-        def isMatchingFormattingElement(self, node1, node2):
-            return (node1.name == node2.name and
-                    node1.namespace == node2.namespace and
-                    node1.attributes == node2.attributes)
+    def endTagHeading(self, token):
+        for item in headingElements:
+            if self.tree.elementInScope(item):
+                self.tree.generateImpliedEndTags()
+                break
+        if self.tree.openElements[-1].name != token["name"]:
+            self.parser.parseError("end-tag-too-early", {"name": token["name"]})
 
-        # helper
-        def addFormattingElement(self, token):
-            self.tree.insertElement(token)
-            element = self.tree.openElements[-1]
+        for item in headingElements:
+            if self.tree.elementInScope(item):
+                item = self.tree.openElements.pop()
+                while item.name not in headingElements:
+                    item = self.tree.openElements.pop()
+                break
 
-            matchingElements = []
-            for node in self.tree.activeFormattingElements[::-1]:
-                if node is Marker:
-                    break
-                elif self.isMatchingFormattingElement(node, element):
-                    matchingElements.append(node)
-
-            assert len(matchingElements) <= 3
-            if len(matchingElements) == 3:
-                self.tree.activeFormattingElements.remove(matchingElements[-1])
-            self.tree.activeFormattingElements.append(element)
-
-        # the real deal
-        def processEOF(self):
-            allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",
-                                          "tfoot", "th", "thead", "tr", "body",
-                                          "html"))
-            for node in self.tree.openElements[::-1]:
-                if node.name not in allowed_elements:
-                    self.parser.parseError("expected-closing-tag-but-got-eof")
-                    break
-            # Stop parsing
+    def endTagFormatting(self, token):
+        """The much-feared adoption agency algorithm"""
+        # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867
+        # XXX Better parseError messages appreciated.
+
+        # Step 1
+        outerLoopCounter = 0
+
+        # Step 2
+        while outerLoopCounter < 8:
+
+            # Step 3
+            outerLoopCounter += 1
+
+            # Step 4:
+
+            # Let the formatting element be the last element in
+            # the list of active formatting elements that:
+            # - is between the end of the list and the last scope
+            # marker in the list, if any, or the start of the list
+            # otherwise, and
+            # - has the same tag name as the token.
+            formattingElement = self.tree.elementInActiveFormattingElements(
+                token["name"])
+            if (not formattingElement or
+                (formattingElement in self.tree.openElements and
+                 not self.tree.elementInScope(formattingElement.name))):
+                # If there is no such node, then abort these steps
+                # and instead act as described in the "any other
+                # end tag" entry below.
+                self.endTagOther(token)
+                return
 
-        def processSpaceCharactersDropNewline(self, token):
-            # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
-            # want to drop leading newlines
-            data = token["data"]
-            self.processSpaceCharacters = self.processSpaceCharactersNonPre
-            if (data.startswith("\n") and
-                self.tree.openElements[-1].name in ("pre", "listing", "textarea") and
-                    not self.tree.openElements[-1].hasContent()):
-                data = data[1:]
-            if data:
-                self.tree.reconstructActiveFormattingElements()
-                self.tree.insertText(data)
-
-        def processCharacters(self, token):
-            if token["data"] == "\u0000":
-                # The tokenizer should always emit null on its own
+            # Otherwise, if there is such a node, but that node is
+            # not in the stack of open elements, then this is a
+            # parse error; remove the element from the list, and
+            # abort these steps.
+            elif formattingElement not in self.tree.openElements:
+                self.parser.parseError("adoption-agency-1.2", {"name": token["name"]})
+                self.tree.activeFormattingElements.remove(formattingElement)
                 return
-            self.tree.reconstructActiveFormattingElements()
-            self.tree.insertText(token["data"])
-            # This must be bad for performance
-            if (self.parser.framesetOK and
-                any(char not in spaceCharacters
-                    for char in token["data"])):
-                self.parser.framesetOK = False
-
-        def processSpaceCharactersNonPre(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            self.tree.insertText(token["data"])
 
-        def startTagProcessInHead(self, token):
-            return self.parser.phases["inHead"].processStartTag(token)
+            # Otherwise, if there is such a node, and that node is
+            # also in the stack of open elements, but the element
+            # is not in scope, then this is a parse error; ignore
+            # the token, and abort these steps.
+            elif not self.tree.elementInScope(formattingElement.name):
+                self.parser.parseError("adoption-agency-4.4", {"name": token["name"]})
+                return
 
-        def startTagBody(self, token):
-            self.parser.parseError("unexpected-start-tag", {"name": "body"})
-            if (len(self.tree.openElements) == 1 or
-                    self.tree.openElements[1].name != "body"):
-                assert self.parser.innerHTML
-            else:
-                self.parser.framesetOK = False
-                for attr, value in token["data"].items():
-                    if attr not in self.tree.openElements[1].attributes:
-                        self.tree.openElements[1].attributes[attr] = value
-
-        def startTagFrameset(self, token):
-            self.parser.parseError("unexpected-start-tag", {"name": "frameset"})
-            if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"):
-                assert self.parser.innerHTML
-            elif not self.parser.framesetOK:
-                pass
+            # Otherwise, there is a formatting element and that
+            # element is in the stack and is in scope. If the
+            # element is not the current node, this is a parse
+            # error. In any case, proceed with the algorithm as
+            # written in the following steps.
             else:
-                if self.tree.openElements[1].parent:
-                    self.tree.openElements[1].parent.removeChild(self.tree.openElements[1])
-                while self.tree.openElements[-1].name != "html":
-                    self.tree.openElements.pop()
-                self.tree.insertElement(token)
-                self.parser.phase = self.parser.phases["inFrameset"]
-
-        def startTagCloseP(self, token):
-            if self.tree.elementInScope("p", variant="button"):
-                self.endTagP(impliedTagToken("p"))
-            self.tree.insertElement(token)
+                if formattingElement != self.tree.openElements[-1]:
+                    self.parser.parseError("adoption-agency-1.3", {"name": token["name"]})
+
+            # Step 5:
+
+            # Let the furthest block be the topmost node in the
+            # stack of open elements that is lower in the stack
+            # than the formatting element, and is an element in
+            # the special category. There might not be one.
+            afeIndex = self.tree.openElements.index(formattingElement)
+            furthestBlock = None
+            for element in self.tree.openElements[afeIndex:]:
+                if element.nameTuple in specialElements:
+                    furthestBlock = element
+                    break
 
-        def startTagPreListing(self, token):
-            if self.tree.elementInScope("p", variant="button"):
-                self.endTagP(impliedTagToken("p"))
-            self.tree.insertElement(token)
-            self.parser.framesetOK = False
-            self.processSpaceCharacters = self.processSpaceCharactersDropNewline
+            # Step 6:
+
+            # If there is no furthest block, then the UA must
+            # first pop all the nodes from the bottom of the stack
+            # of open elements, from the current node up to and
+            # including the formatting element, then remove the
+            # formatting element from the list of active
+            # formatting elements, and finally abort these steps.
+            if furthestBlock is None:
+                element = self.tree.openElements.pop()
+                while element != formattingElement:
+                    element = self.tree.openElements.pop()
+                self.tree.activeFormattingElements.remove(element)
+                return
 
-        def startTagForm(self, token):
-            if self.tree.formPointer:
-                self.parser.parseError("unexpected-start-tag", {"name": "form"})
+            # Step 7
+            commonAncestor = self.tree.openElements[afeIndex - 1]
+
+            # Step 8:
+            # The bookmark is supposed to help us identify where to reinsert
+            # nodes in step 15. We have to ensure that we reinsert nodes after
+            # the node before the active formatting element. Note the bookmark
+            # can move in step 9.7
+            bookmark = self.tree.activeFormattingElements.index(formattingElement)
+
+            # Step 9
+            lastNode = node = furthestBlock
+            innerLoopCounter = 0
+
+            index = self.tree.openElements.index(node)
+            while innerLoopCounter < 3:
+                innerLoopCounter += 1
+                # Node is element before node in open elements
+                index -= 1
+                node = self.tree.openElements[index]
+                if node not in self.tree.activeFormattingElements:
+                    self.tree.openElements.remove(node)
+                    continue
+                # Step 9.6
+                if node == formattingElement:
+                    break
+                # Step 9.7
+                if lastNode == furthestBlock:
+                    bookmark = self.tree.activeFormattingElements.index(node) + 1
+                # Step 9.8
+                clone = node.cloneNode()
+                # Replace node with clone
+                self.tree.activeFormattingElements[
+                    self.tree.activeFormattingElements.index(node)] = clone
+                self.tree.openElements[
+                    self.tree.openElements.index(node)] = clone
+                node = clone
+                # Step 9.9
+                # Remove lastNode from its parents, if any
+                if lastNode.parent:
+                    lastNode.parent.removeChild(lastNode)
+                node.appendChild(lastNode)
+                # Step 9.10
+                lastNode = node
+
+            # Step 10
+            # Foster parent lastNode if commonAncestor is a
+            # table, tbody, tfoot, thead, or tr we need to foster
+            # parent the lastNode
+            if lastNode.parent:
+                lastNode.parent.removeChild(lastNode)
+
+            if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")):
+                parent, insertBefore = self.tree.getTableMisnestedNodePosition()
+                parent.insertBefore(lastNode, insertBefore)
             else:
-                if self.tree.elementInScope("p", variant="button"):
-                    self.endTagP(impliedTagToken("p"))
-                self.tree.insertElement(token)
-                self.tree.formPointer = self.tree.openElements[-1]
+                commonAncestor.appendChild(lastNode)
 
-        def startTagListItem(self, token):
-            self.parser.framesetOK = False
+            # Step 11
+            clone = formattingElement.cloneNode()
 
-            stopNamesMap = {"li": ["li"],
-                            "dt": ["dt", "dd"],
-                            "dd": ["dt", "dd"]}
-            stopNames = stopNamesMap[token["name"]]
-            for node in reversed(self.tree.openElements):
-                if node.name in stopNames:
-                    self.parser.phase.processEndTag(
-                        impliedTagToken(node.name, "EndTag"))
-                    break
-                if (node.nameTuple in specialElements and
-                        node.name not in ("address", "div", "p")):
-                    break
+            # Step 12
+            furthestBlock.reparentChildren(clone)
 
-            if self.tree.elementInScope("p", variant="button"):
-                self.parser.phase.processEndTag(
-                    impliedTagToken("p", "EndTag"))
+            # Step 13
+            furthestBlock.appendChild(clone)
 
-            self.tree.insertElement(token)
+            # Step 14
+            self.tree.activeFormattingElements.remove(formattingElement)
+            self.tree.activeFormattingElements.insert(bookmark, clone)
 
-        def startTagPlaintext(self, token):
-            if self.tree.elementInScope("p", variant="button"):
-                self.endTagP(impliedTagToken("p"))
-            self.tree.insertElement(token)
-            self.parser.tokenizer.state = self.parser.tokenizer.plaintextState
+            # Step 15
+            self.tree.openElements.remove(formattingElement)
+            self.tree.openElements.insert(
+                self.tree.openElements.index(furthestBlock) + 1, clone)
 
-        def startTagHeading(self, token):
-            if self.tree.elementInScope("p", variant="button"):
-                self.endTagP(impliedTagToken("p"))
-            if self.tree.openElements[-1].name in headingElements:
-                self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
-                self.tree.openElements.pop()
-            self.tree.insertElement(token)
+    def endTagAppletMarqueeObject(self, token):
+        if self.tree.elementInScope(token["name"]):
+            self.tree.generateImpliedEndTags()
+        if self.tree.openElements[-1].name != token["name"]:
+            self.parser.parseError("end-tag-too-early", {"name": token["name"]})
 
-        def startTagA(self, token):
-            afeAElement = self.tree.elementInActiveFormattingElements("a")
-            if afeAElement:
-                self.parser.parseError("unexpected-start-tag-implies-end-tag",
-                                       {"startName": "a", "endName": "a"})
-                self.endTagFormatting(impliedTagToken("a"))
-                if afeAElement in self.tree.openElements:
-                    self.tree.openElements.remove(afeAElement)
-                if afeAElement in self.tree.activeFormattingElements:
-                    self.tree.activeFormattingElements.remove(afeAElement)
-            self.tree.reconstructActiveFormattingElements()
-            self.addFormattingElement(token)
+        if self.tree.elementInScope(token["name"]):
+            element = self.tree.openElements.pop()
+            while element.name != token["name"]:
+                element = self.tree.openElements.pop()
+            self.tree.clearActiveFormattingElements()
 
-        def startTagFormatting(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            self.addFormattingElement(token)
+    def endTagBr(self, token):
+        self.parser.parseError("unexpected-end-tag-treated-as",
+                               {"originalName": "br", "newName": "br element"})
+        self.tree.reconstructActiveFormattingElements()
+        self.tree.insertElement(impliedTagToken("br", "StartTag"))
+        self.tree.openElements.pop()
 
-        def startTagNobr(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            if self.tree.elementInScope("nobr"):
-                self.parser.parseError("unexpected-start-tag-implies-end-tag",
-                                       {"startName": "nobr", "endName": "nobr"})
-                self.processEndTag(impliedTagToken("nobr"))
-                # XXX Need tests that trigger the following
-                self.tree.reconstructActiveFormattingElements()
-            self.addFormattingElement(token)
-
-        def startTagButton(self, token):
-            if self.tree.elementInScope("button"):
-                self.parser.parseError("unexpected-start-tag-implies-end-tag",
-                                       {"startName": "button", "endName": "button"})
-                self.processEndTag(impliedTagToken("button"))
-                return token
+    def endTagOther(self, token):
+        for node in self.tree.openElements[::-1]:
+            if node.name == token["name"]:
+                self.tree.generateImpliedEndTags(exclude=token["name"])
+                if self.tree.openElements[-1].name != token["name"]:
+                    self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+                while self.tree.openElements.pop() != node:
+                    pass
+                break
             else:
-                self.tree.reconstructActiveFormattingElements()
-                self.tree.insertElement(token)
-                self.parser.framesetOK = False
+                if node.nameTuple in specialElements:
+                    self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+                    break
 
-        def startTagAppletMarqueeObject(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            self.tree.insertElement(token)
-            self.tree.activeFormattingElements.append(Marker)
-            self.parser.framesetOK = False
+    startTagHandler = _utils.MethodDispatcher([
+        ("html", Phase.startTagHtml),
+        (("base", "basefont", "bgsound", "command", "link", "meta",
+          "script", "style", "title"),
+         startTagProcessInHead),
+        ("body", startTagBody),
+        ("frameset", startTagFrameset),
+        (("address", "article", "aside", "blockquote", "center", "details",
+          "dir", "div", "dl", "fieldset", "figcaption", "figure",
+          "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
+          "section", "summary", "ul"),
+         startTagCloseP),
+        (headingElements, startTagHeading),
+        (("pre", "listing"), startTagPreListing),
+        ("form", startTagForm),
+        (("li", "dd", "dt"), startTagListItem),
+        ("plaintext", startTagPlaintext),
+        ("a", startTagA),
+        (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
+          "strong", "tt", "u"), startTagFormatting),
+        ("nobr", startTagNobr),
+        ("button", startTagButton),
+        (("applet", "marquee", "object"), startTagAppletMarqueeObject),
+        ("xmp", startTagXmp),
+        ("table", startTagTable),
+        (("area", "br", "embed", "img", "keygen", "wbr"),
+         startTagVoidFormatting),
+        (("param", "source", "track"), startTagParamSource),
+        ("input", startTagInput),
+        ("hr", startTagHr),
+        ("image", startTagImage),
+        ("isindex", startTagIsIndex),
+        ("textarea", startTagTextarea),
+        ("iframe", startTagIFrame),
+        ("noscript", startTagNoscript),
+        (("noembed", "noframes"), startTagRawtext),
+        ("select", startTagSelect),
+        (("rp", "rt"), startTagRpRt),
+        (("option", "optgroup"), startTagOpt),
+        (("math"), startTagMath),
+        (("svg"), startTagSvg),
+        (("caption", "col", "colgroup", "frame", "head",
+          "tbody", "td", "tfoot", "th", "thead",
+          "tr"), startTagMisplaced)
+    ])
+    startTagHandler.default = startTagOther
+
+    endTagHandler = _utils.MethodDispatcher([
+        ("body", endTagBody),
+        ("html", endTagHtml),
+        (("address", "article", "aside", "blockquote", "button", "center",
+          "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
+          "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
+          "section", "summary", "ul"), endTagBlock),
+        ("form", endTagForm),
+        ("p", endTagP),
+        (("dd", "dt", "li"), endTagListItem),
+        (headingElements, endTagHeading),
+        (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
+          "strike", "strong", "tt", "u"), endTagFormatting),
+        (("applet", "marquee", "object"), endTagAppletMarqueeObject),
+        ("br", endTagBr),
+    ])
+    endTagHandler.default = endTagOther
+
+
+class TextPhase(Phase):
+    __slots__ = tuple()
+
+    def processCharacters(self, token):
+        self.tree.insertText(token["data"])
+
+    def processEOF(self):
+        self.parser.parseError("expected-named-closing-tag-but-got-eof",
+                               {"name": self.tree.openElements[-1].name})
+        self.tree.openElements.pop()
+        self.parser.phase = self.parser.originalPhase
+        return True
+
+    def startTagOther(self, token):
+        assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name']
+
+    def endTagScript(self, token):
+        node = self.tree.openElements.pop()
+        assert node.name == "script"
+        self.parser.phase = self.parser.originalPhase
+        # The rest of this method is all stuff that only happens if
+        # document.write works
+
+    def endTagOther(self, token):
+        self.tree.openElements.pop()
+        self.parser.phase = self.parser.originalPhase
+
+    startTagHandler = _utils.MethodDispatcher([])
+    startTagHandler.default = startTagOther
+    endTagHandler = _utils.MethodDispatcher([
+        ("script", endTagScript)])
+    endTagHandler.default = endTagOther
+
+
+class InTablePhase(Phase):
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-table
+    __slots__ = tuple()
+
+    # helper methods
+    def clearStackToTableContext(self):
+        # "clear the stack back to a table context"
+        while self.tree.openElements[-1].name not in ("table", "html"):
+            # self.parser.parseError("unexpected-implied-end-tag-in-table",
+            #  {"name":  self.tree.openElements[-1].name})
+            self.tree.openElements.pop()
+        # When the current node is <html> it's an innerHTML case
 
-        def startTagXmp(self, token):
-            if self.tree.elementInScope("p", variant="button"):
-                self.endTagP(impliedTagToken("p"))
-            self.tree.reconstructActiveFormattingElements()
-            self.parser.framesetOK = False
-            self.parser.parseRCDataRawtext(token, "RAWTEXT")
+    # processing methods
+    def processEOF(self):
+        if self.tree.openElements[-1].name != "html":
+            self.parser.parseError("eof-in-table")
+        else:
+            assert self.parser.innerHTML
+        # Stop parsing
+
+    def processSpaceCharacters(self, token):
+        originalPhase = self.parser.phase
+        self.parser.phase = self.parser.phases["inTableText"]
+        self.parser.phase.originalPhase = originalPhase
+        self.parser.phase.processSpaceCharacters(token)
+
+    def processCharacters(self, token):
+        originalPhase = self.parser.phase
+        self.parser.phase = self.parser.phases["inTableText"]
+        self.parser.phase.originalPhase = originalPhase
+        self.parser.phase.processCharacters(token)
+
+    def insertText(self, token):
+        # If we get here there must be at least one non-whitespace character
+        # Do the table magic!
+        self.tree.insertFromTable = True
+        self.parser.phases["inBody"].processCharacters(token)
+        self.tree.insertFromTable = False
+
+    def startTagCaption(self, token):
+        self.clearStackToTableContext()
+        self.tree.activeFormattingElements.append(Marker)
+        self.tree.insertElement(token)
+        self.parser.phase = self.parser.phases["inCaption"]
 
-        def startTagTable(self, token):
-            if self.parser.compatMode != "quirks":
-                if self.tree.elementInScope("p", variant="button"):
-                    self.processEndTag(impliedTagToken("p"))
-            self.tree.insertElement(token)
-            self.parser.framesetOK = False
-            self.parser.phase = self.parser.phases["inTable"]
+    def startTagColgroup(self, token):
+        self.clearStackToTableContext()
+        self.tree.insertElement(token)
+        self.parser.phase = self.parser.phases["inColumnGroup"]
 
-        def startTagVoidFormatting(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            self.tree.insertElement(token)
-            self.tree.openElements.pop()
-            token["selfClosingAcknowledged"] = True
-            self.parser.framesetOK = False
+    def startTagCol(self, token):
+        self.startTagColgroup(impliedTagToken("colgroup", "StartTag"))
+        return token
 
-        def startTagInput(self, token):
-            framesetOK = self.parser.framesetOK
-            self.startTagVoidFormatting(token)
-            if ("type" in token["data"] and
-                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
-                # input type=hidden doesn't change framesetOK
-                self.parser.framesetOK = framesetOK
+    def startTagRowGroup(self, token):
+        self.clearStackToTableContext()
+        self.tree.insertElement(token)
+        self.parser.phase = self.parser.phases["inTableBody"]
 
-        def startTagParamSource(self, token):
-            self.tree.insertElement(token)
-            self.tree.openElements.pop()
-            token["selfClosingAcknowledged"] = True
+    def startTagImplyTbody(self, token):
+        self.startTagRowGroup(impliedTagToken("tbody", "StartTag"))
+        return token
 
-        def startTagHr(self, token):
-            if self.tree.elementInScope("p", variant="button"):
-                self.endTagP(impliedTagToken("p"))
+    def startTagTable(self, token):
+        self.parser.parseError("unexpected-start-tag-implies-end-tag",
+                               {"startName": "table", "endName": "table"})
+        self.parser.phase.processEndTag(impliedTagToken("table"))
+        if not self.parser.innerHTML:
+            return token
+
+    def startTagStyleScript(self, token):
+        return self.parser.phases["inHead"].processStartTag(token)
+
+    def startTagInput(self, token):
+        if ("type" in token["data"] and
+                token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
+            self.parser.parseError("unexpected-hidden-input-in-table")
             self.tree.insertElement(token)
+            # XXX associate with form
             self.tree.openElements.pop()
-            token["selfClosingAcknowledged"] = True
-            self.parser.framesetOK = False
+        else:
+            self.startTagOther(token)
 
-        def startTagImage(self, token):
-            # No really...
-            self.parser.parseError("unexpected-start-tag-treated-as",
-                                   {"originalName": "image", "newName": "img"})
-            self.processStartTag(impliedTagToken("img", "StartTag",
-                                                 attributes=token["data"],
-                                                 selfClosing=token["selfClosing"]))
-
-        def startTagIsIndex(self, token):
-            self.parser.parseError("deprecated-tag", {"name": "isindex"})
-            if self.tree.formPointer:
-                return
-            form_attrs = {}
-            if "action" in token["data"]:
-                form_attrs["action"] = token["data"]["action"]
-            self.processStartTag(impliedTagToken("form", "StartTag",
-                                                 attributes=form_attrs))
-            self.processStartTag(impliedTagToken("hr", "StartTag"))
-            self.processStartTag(impliedTagToken("label", "StartTag"))
-            # XXX Localization ...
-            if "prompt" in token["data"]:
-                prompt = token["data"]["prompt"]
-            else:
-                prompt = "This is a searchable index. Enter search keywords: "
-            self.processCharacters(
-                {"type": tokenTypes["Characters"], "data": prompt})
-            attributes = token["data"].copy()
-            if "action" in attributes:
-                del attributes["action"]
-            if "prompt" in attributes:
-                del attributes["prompt"]
-            attributes["name"] = "isindex"
-            self.processStartTag(impliedTagToken("input", "StartTag",
-                                                 attributes=attributes,
-                                                 selfClosing=token["selfClosing"]))
-            self.processEndTag(impliedTagToken("label"))
-            self.processStartTag(impliedTagToken("hr", "StartTag"))
-            self.processEndTag(impliedTagToken("form"))
-
-        def startTagTextarea(self, token):
+    def startTagForm(self, token):
+        self.parser.parseError("unexpected-form-in-table")
+        if self.tree.formPointer is None:
             self.tree.insertElement(token)
-            self.parser.tokenizer.state = self.parser.tokenizer.rcdataState
-            self.processSpaceCharacters = self.processSpaceCharactersDropNewline
-            self.parser.framesetOK = False
+            self.tree.formPointer = self.tree.openElements[-1]
+            self.tree.openElements.pop()
 
-        def startTagIFrame(self, token):
-            self.parser.framesetOK = False
-            self.startTagRawtext(token)
+    def startTagOther(self, token):
+        self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]})
+        # Do the table magic!
+        self.tree.insertFromTable = True
+        self.parser.phases["inBody"].processStartTag(token)
+        self.tree.insertFromTable = False
+
+    def endTagTable(self, token):
+        if self.tree.elementInScope("table", variant="table"):
+            self.tree.generateImpliedEndTags()
+            if self.tree.openElements[-1].name != "table":
+                self.parser.parseError("end-tag-too-early-named",
+                                       {"gotName": "table",
+                                        "expectedName": self.tree.openElements[-1].name})
+            while self.tree.openElements[-1].name != "table":
+                self.tree.openElements.pop()
+            self.tree.openElements.pop()
+            self.parser.resetInsertionMode()
+        else:
+            # innerHTML case
+            assert self.parser.innerHTML
+            self.parser.parseError()
 
-        def startTagNoscript(self, token):
-            if self.parser.scripting:
-                self.startTagRawtext(token)
-            else:
-                self.startTagOther(token)
+    def endTagIgnore(self, token):
+        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+    def endTagOther(self, token):
+        self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]})
+        # Do the table magic!
+        self.tree.insertFromTable = True
+        self.parser.phases["inBody"].processEndTag(token)
+        self.tree.insertFromTable = False
+
+    startTagHandler = _utils.MethodDispatcher([
+        ("html", Phase.startTagHtml),
+        ("caption", startTagCaption),
+        ("colgroup", startTagColgroup),
+        ("col", startTagCol),
+        (("tbody", "tfoot", "thead"), startTagRowGroup),
+        (("td", "th", "tr"), startTagImplyTbody),
+        ("table", startTagTable),
+        (("style", "script"), startTagStyleScript),
+        ("input", startTagInput),
+        ("form", startTagForm)
+    ])
+    startTagHandler.default = startTagOther
+
+    endTagHandler = _utils.MethodDispatcher([
+        ("table", endTagTable),
+        (("body", "caption", "col", "colgroup", "html", "tbody", "td",
+          "tfoot", "th", "thead", "tr"), endTagIgnore)
+    ])
+    endTagHandler.default = endTagOther
+
+
+class InTableTextPhase(Phase):
+    __slots__ = ("originalPhase", "characterTokens")
+
+    def __init__(self, *args, **kwargs):
+        super(InTableTextPhase, self).__init__(*args, **kwargs)
+        self.originalPhase = None
+        self.characterTokens = []
+
+    def flushCharacters(self):
+        data = "".join([item["data"] for item in self.characterTokens])
+        if any(item not in spaceCharacters for item in data):
+            token = {"type": tokenTypes["Characters"], "data": data}
+            self.parser.phases["inTable"].insertText(token)
+        elif data:
+            self.tree.insertText(data)
+        self.characterTokens = []
+
+    def processComment(self, token):
+        self.flushCharacters()
+        self.parser.phase = self.originalPhase
+        return token
+
+    def processEOF(self):
+        self.flushCharacters()
+        self.parser.phase = self.originalPhase
+        return True
+
+    def processCharacters(self, token):
+        if token["data"] == "\u0000":
+            return
+        self.characterTokens.append(token)
+
+    def processSpaceCharacters(self, token):
+        # pretty sure we should never reach here
+        self.characterTokens.append(token)
+#        assert False
+
+    def processStartTag(self, token):
+        self.flushCharacters()
+        self.parser.phase = self.originalPhase
+        return token
+
+    def processEndTag(self, token):
+        self.flushCharacters()
+        self.parser.phase = self.originalPhase
+        return token
+
+
+class InCaptionPhase(Phase):
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
+    __slots__ = tuple()
+
+    def ignoreEndTagCaption(self):
+        return not self.tree.elementInScope("caption", variant="table")
+
+    def processEOF(self):
+        self.parser.phases["inBody"].processEOF()
+
+    def processCharacters(self, token):
+        return self.parser.phases["inBody"].processCharacters(token)
+
+    def startTagTableElement(self, token):
+        self.parser.parseError()
+        # XXX Have to duplicate logic here to find out if the tag is ignored
+        ignoreEndTag = self.ignoreEndTagCaption()
+        self.parser.phase.processEndTag(impliedTagToken("caption"))
+        if not ignoreEndTag:
+            return token
 
-        def startTagRawtext(self, token):
-            """iframe, noembed noframes, noscript(if scripting enabled)"""
-            self.parser.parseRCDataRawtext(token, "RAWTEXT")
-
-        def startTagOpt(self, token):
-            if self.tree.openElements[-1].name == "option":
-                self.parser.phase.processEndTag(impliedTagToken("option"))
-            self.tree.reconstructActiveFormattingElements()
-            self.parser.tree.insertElement(token)
-
-        def startTagSelect(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            self.tree.insertElement(token)
-            self.parser.framesetOK = False
-            if self.parser.phase in (self.parser.phases["inTable"],
-                                     self.parser.phases["inCaption"],
-                                     self.parser.phases["inColumnGroup"],
-                                     self.parser.phases["inTableBody"],
-                                     self.parser.phases["inRow"],
-                                     self.parser.phases["inCell"]):
-                self.parser.phase = self.parser.phases["inSelectInTable"]
-            else:
-                self.parser.phase = self.parser.phases["inSelect"]
-
-        def startTagRpRt(self, token):
-            if self.tree.elementInScope("ruby"):
-                self.tree.generateImpliedEndTags()
-                if self.tree.openElements[-1].name != "ruby":
-                    self.parser.parseError()
-            self.tree.insertElement(token)
-
-        def startTagMath(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            self.parser.adjustMathMLAttributes(token)
-            self.parser.adjustForeignAttributes(token)
-            token["namespace"] = namespaces["mathml"]
-            self.tree.insertElement(token)
-            # Need to get the parse error right for the case where the token
-            # has a namespace not equal to the xmlns attribute
-            if token["selfClosing"]:
+    def startTagOther(self, token):
+        return self.parser.phases["inBody"].processStartTag(token)
+
+    def endTagCaption(self, token):
+        if not self.ignoreEndTagCaption():
+            # AT this code is quite similar to endTagTable in "InTable"
+            self.tree.generateImpliedEndTags()
+            if self.tree.openElements[-1].name != "caption":
+                self.parser.parseError("expected-one-end-tag-but-got-another",
+                                       {"gotName": "caption",
+                                        "expectedName": self.tree.openElements[-1].name})
+            while self.tree.openElements[-1].name != "caption":
                 self.tree.openElements.pop()
-                token["selfClosingAcknowledged"] = True
-
-        def startTagSvg(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            self.parser.adjustSVGAttributes(token)
-            self.parser.adjustForeignAttributes(token)
-            token["namespace"] = namespaces["svg"]
-            self.tree.insertElement(token)
-            # Need to get the parse error right for the case where the token
-            # has a namespace not equal to the xmlns attribute
-            if token["selfClosing"]:
-                self.tree.openElements.pop()
-                token["selfClosingAcknowledged"] = True
-
-        def startTagMisplaced(self, token):
-            """ Elements that should be children of other elements that have a
-            different insertion mode; here they are ignored
-            "caption", "col", "colgroup", "frame", "frameset", "head",
-            "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
-            "tr", "noscript"
-            """
-            self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]})
-
-        def startTagOther(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            self.tree.insertElement(token)
-
-        def endTagP(self, token):
-            if not self.tree.elementInScope("p", variant="button"):
-                self.startTagCloseP(impliedTagToken("p", "StartTag"))
-                self.parser.parseError("unexpected-end-tag", {"name": "p"})
-                self.endTagP(impliedTagToken("p", "EndTag"))
-            else:
-                self.tree.generateImpliedEndTags("p")
-                if self.tree.openElements[-1].name != "p":
-                    self.parser.parseError("unexpected-end-tag", {"name": "p"})
-                node = self.tree.openElements.pop()
-                while node.name != "p":
-                    node = self.tree.openElements.pop()
-
-        def endTagBody(self, token):
-            if not self.tree.elementInScope("body"):
-                self.parser.parseError()
-                return
-            elif self.tree.openElements[-1].name != "body":
-                for node in self.tree.openElements[2:]:
-                    if node.name not in frozenset(("dd", "dt", "li", "optgroup",
-                                                   "option", "p", "rp", "rt",
-                                                   "tbody", "td", "tfoot",
-                                                   "th", "thead", "tr", "body",
-                                                   "html")):
-                        # Not sure this is the correct name for the parse error
-                        self.parser.parseError(
-                            "expected-one-end-tag-but-got-another",
-                            {"gotName": "body", "expectedName": node.name})
-                        break
-            self.parser.phase = self.parser.phases["afterBody"]
-
-        def endTagHtml(self, token):
-            # We repeat the test for the body end tag token being ignored here
-            if self.tree.elementInScope("body"):
-                self.endTagBody(impliedTagToken("body"))
-                return token
-
-        def endTagBlock(self, token):
-            # Put us back in the right whitespace handling mode
-            if token["name"] == "pre":
-                self.processSpaceCharacters = self.processSpaceCharactersNonPre
-            inScope = self.tree.elementInScope(token["name"])
-            if inScope:
-                self.tree.generateImpliedEndTags()
-            if self.tree.openElements[-1].name != token["name"]:
-                self.parser.parseError("end-tag-too-early", {"name": token["name"]})
-            if inScope:
-                node = self.tree.openElements.pop()
-                while node.name != token["name"]:
-                    node = self.tree.openElements.pop()
-
-        def endTagForm(self, token):
-            node = self.tree.formPointer
-            self.tree.formPointer = None
-            if node is None or not self.tree.elementInScope(node):
-                self.parser.parseError("unexpected-end-tag",
-                                       {"name": "form"})
-            else:
-                self.tree.generateImpliedEndTags()
-                if self.tree.openElements[-1] != node:
-                    self.parser.parseError("end-tag-too-early-ignored",
-                                           {"name": "form"})
-                self.tree.openElements.remove(node)
-
-        def endTagListItem(self, token):
-            if token["name"] == "li":
-                variant = "list"
-            else:
-                variant = None
-            if not self.tree.elementInScope(token["name"], variant=variant):
-                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-            else:
-                self.tree.generateImpliedEndTags(exclude=token["name"])
-                if self.tree.openElements[-1].name != token["name"]:
-                    self.parser.parseError(
-                        "end-tag-too-early",
-                        {"name": token["name"]})
-                node = self.tree.openElements.pop()
-                while node.name != token["name"]:
-                    node = self.tree.openElements.pop()
-
-        def endTagHeading(self, token):
-            for item in headingElements:
-                if self.tree.elementInScope(item):
-                    self.tree.generateImpliedEndTags()
-                    break
-            if self.tree.openElements[-1].name != token["name"]:
-                self.parser.parseError("end-tag-too-early", {"name": token["name"]})
-
-            for item in headingElements:
-                if self.tree.elementInScope(item):
-                    item = self.tree.openElements.pop()
-                    while item.name not in headingElements:
-                        item = self.tree.openElements.pop()
-                    break
-
-        def endTagFormatting(self, token):
-            """The much-feared adoption agency algorithm"""
-            # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867
-            # XXX Better parseError messages appreciated.
-
-            # Step 1
-            outerLoopCounter = 0
-
-            # Step 2
-            while outerLoopCounter < 8:
-
-                # Step 3
-                outerLoopCounter += 1
-
-                # Step 4:
-
-                # Let the formatting element be the last element in
-                # the list of active formatting elements that:
-                # - is between the end of the list and the last scope
-                # marker in the list, if any, or the start of the list
-                # otherwise, and
-                # - has the same tag name as the token.
-                formattingElement = self.tree.elementInActiveFormattingElements(
-                    token["name"])
-                if (not formattingElement or
-                    (formattingElement in self.tree.openElements and
-                     not self.tree.elementInScope(formattingElement.name))):
-                    # If there is no such node, then abort these steps
-                    # and instead act as described in the "any other
-                    # end tag" entry below.
-                    self.endTagOther(token)
-                    return
-
-                # Otherwise, if there is such a node, but that node is
-                # not in the stack of open elements, then this is a
-                # parse error; remove the element from the list, and
-                # abort these steps.
-                elif formattingElement not in self.tree.openElements:
-                    self.parser.parseError("adoption-agency-1.2", {"name": token["name"]})
-                    self.tree.activeFormattingElements.remove(formattingElement)
-                    return
-
-                # Otherwise, if there is such a node, and that node is
-                # also in the stack of open elements, but the element
-                # is not in scope, then this is a parse error; ignore
-                # the token, and abort these steps.
-                elif not self.tree.elementInScope(formattingElement.name):
-                    self.parser.parseError("adoption-agency-4.4", {"name": token["name"]})
-                    return
-
-                # Otherwise, there is a formatting element and that
-                # element is in the stack and is in scope. If the
-                # element is not the current node, this is a parse
-                # error. In any case, proceed with the algorithm as
-                # written in the following steps.
-                else:
-                    if formattingElement != self.tree.openElements[-1]:
-                        self.parser.parseError("adoption-agency-1.3", {"name": token["name"]})
-
-                # Step 5:
-
-                # Let the furthest block be the topmost node in the
-                # stack of open elements that is lower in the stack
-                # than the formatting element, and is an element in
-                # the special category. There might not be one.
-                afeIndex = self.tree.openElements.index(formattingElement)
-                furthestBlock = None
-                for element in self.tree.openElements[afeIndex:]:
-                    if element.nameTuple in specialElements:
-                        furthestBlock = element
-                        break
-
-                # Step 6:
-
-                # If there is no furthest block, then the UA must
-                # first pop all the nodes from the bottom of the stack
-                # of open elements, from the current node up to and
-                # including the formatting element, then remove the
-                # formatting element from the list of active
-                # formatting elements, and finally abort these steps.
-                if furthestBlock is None:
-                    element = self.tree.openElements.pop()
-                    while element != formattingElement:
-                        element = self.tree.openElements.pop()
-                    self.tree.activeFormattingElements.remove(element)
-                    return
-
-                # Step 7
-                commonAncestor = self.tree.openElements[afeIndex - 1]
-
-                # Step 8:
-                # The bookmark is supposed to help us identify where to reinsert
-                # nodes in step 15. We have to ensure that we reinsert nodes after
-                # the node before the active formatting element. Note the bookmark
-                # can move in step 9.7
-                bookmark = self.tree.activeFormattingElements.index(formattingElement)
-
-                # Step 9
-                lastNode = node = furthestBlock
-                innerLoopCounter = 0
-
-                index = self.tree.openElements.index(node)
-                while innerLoopCounter < 3:
-                    innerLoopCounter += 1
-                    # Node is element before node in open elements
-                    index -= 1
-                    node = self.tree.openElements[index]
-                    if node not in self.tree.activeFormattingElements:
-                        self.tree.openElements.remove(node)
-                        continue
-                    # Step 9.6
-                    if node == formattingElement:
-                        break
-                    # Step 9.7
-                    if lastNode == furthestBlock:
-                        bookmark = self.tree.activeFormattingElements.index(node) + 1
-                    # Step 9.8
-                    clone = node.cloneNode()
-                    # Replace node with clone
-                    self.tree.activeFormattingElements[
-                        self.tree.activeFormattingElements.index(node)] = clone
-                    self.tree.openElements[
-                        self.tree.openElements.index(node)] = clone
-                    node = clone
-                    # Step 9.9
-                    # Remove lastNode from its parents, if any
-                    if lastNode.parent:
-                        lastNode.parent.removeChild(lastNode)
-                    node.appendChild(lastNode)
-                    # Step 9.10
-                    lastNode = node
-
-                # Step 10
-                # Foster parent lastNode if commonAncestor is a
-                # table, tbody, tfoot, thead, or tr we need to foster
-                # parent the lastNode
-                if lastNode.parent:
-                    lastNode.parent.removeChild(lastNode)
-
-                if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")):
-                    parent, insertBefore = self.tree.getTableMisnestedNodePosition()
-                    parent.insertBefore(lastNode, insertBefore)
-                else:
-                    commonAncestor.appendChild(lastNode)
-
-                # Step 11
-                clone = formattingElement.cloneNode()
-
-                # Step 12
-                furthestBlock.reparentChildren(clone)
-
-                # Step 13
-                furthestBlock.appendChild(clone)
-
-                # Step 14
-                self.tree.activeFormattingElements.remove(formattingElement)
-                self.tree.activeFormattingElements.insert(bookmark, clone)
-
-                # Step 15
-                self.tree.openElements.remove(formattingElement)
-                self.tree.openElements.insert(
-                    self.tree.openElements.index(furthestBlock) + 1, clone)
-
-        def endTagAppletMarqueeObject(self, token):
-            if self.tree.elementInScope(token["name"]):
-                self.tree.generateImpliedEndTags()
-            if self.tree.openElements[-1].name != token["name"]:
-                self.parser.parseError("end-tag-too-early", {"name": token["name"]})
-
-            if self.tree.elementInScope(token["name"]):
-                element = self.tree.openElements.pop()
-                while element.name != token["name"]:
-                    element = self.tree.openElements.pop()
-                self.tree.clearActiveFormattingElements()
-
-        def endTagBr(self, token):
-            self.parser.parseError("unexpected-end-tag-treated-as",
-                                   {"originalName": "br", "newName": "br element"})
-            self.tree.reconstructActiveFormattingElements()
-            self.tree.insertElement(impliedTagToken("br", "StartTag"))
             self.tree.openElements.pop()
+            self.tree.clearActiveFormattingElements()
+            self.parser.phase = self.parser.phases["inTable"]
+        else:
+            # innerHTML case
+            assert self.parser.innerHTML
+            self.parser.parseError()
 
-        def endTagOther(self, token):
-            for node in self.tree.openElements[::-1]:
-                if node.name == token["name"]:
-                    self.tree.generateImpliedEndTags(exclude=token["name"])
-                    if self.tree.openElements[-1].name != token["name"]:
-                        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-                    while self.tree.openElements.pop() != node:
-                        pass
-                    break
-                else:
-                    if node.nameTuple in specialElements:
-                        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-                        break
+    def endTagTable(self, token):
+        self.parser.parseError()
+        ignoreEndTag = self.ignoreEndTagCaption()
+        self.parser.phase.processEndTag(impliedTagToken("caption"))
+        if not ignoreEndTag:
+            return token
 
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            (("base", "basefont", "bgsound", "command", "link", "meta",
-              "script", "style", "title"),
-             startTagProcessInHead),
-            ("body", startTagBody),
-            ("frameset", startTagFrameset),
-            (("address", "article", "aside", "blockquote", "center", "details",
-              "dir", "div", "dl", "fieldset", "figcaption", "figure",
-              "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
-              "section", "summary", "ul"),
-             startTagCloseP),
-            (headingElements, startTagHeading),
-            (("pre", "listing"), startTagPreListing),
-            ("form", startTagForm),
-            (("li", "dd", "dt"), startTagListItem),
-            ("plaintext", startTagPlaintext),
-            ("a", startTagA),
-            (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
-              "strong", "tt", "u"), startTagFormatting),
-            ("nobr", startTagNobr),
-            ("button", startTagButton),
-            (("applet", "marquee", "object"), startTagAppletMarqueeObject),
-            ("xmp", startTagXmp),
-            ("table", startTagTable),
-            (("area", "br", "embed", "img", "keygen", "wbr"),
-             startTagVoidFormatting),
-            (("param", "source", "track"), startTagParamSource),
-            ("input", startTagInput),
-            ("hr", startTagHr),
-            ("image", startTagImage),
-            ("isindex", startTagIsIndex),
-            ("textarea", startTagTextarea),
-            ("iframe", startTagIFrame),
-            ("noscript", startTagNoscript),
-            (("noembed", "noframes"), startTagRawtext),
-            ("select", startTagSelect),
-            (("rp", "rt"), startTagRpRt),
-            (("option", "optgroup"), startTagOpt),
-            (("math"), startTagMath),
-            (("svg"), startTagSvg),
-            (("caption", "col", "colgroup", "frame", "head",
-              "tbody", "td", "tfoot", "th", "thead",
-              "tr"), startTagMisplaced)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("body", endTagBody),
-            ("html", endTagHtml),
-            (("address", "article", "aside", "blockquote", "button", "center",
-              "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
-              "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
-              "section", "summary", "ul"), endTagBlock),
-            ("form", endTagForm),
-            ("p", endTagP),
-            (("dd", "dt", "li"), endTagListItem),
-            (headingElements, endTagHeading),
-            (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
-              "strike", "strong", "tt", "u"), endTagFormatting),
-            (("applet", "marquee", "object"), endTagAppletMarqueeObject),
-            ("br", endTagBr),
-        ])
-        endTagHandler.default = endTagOther
-
-    class TextPhase(Phase):
-        __slots__ = tuple()
-
-        def processCharacters(self, token):
-            self.tree.insertText(token["data"])
-
-        def processEOF(self):
-            self.parser.parseError("expected-named-closing-tag-but-got-eof",
-                                   {"name": self.tree.openElements[-1].name})
-            self.tree.openElements.pop()
-            self.parser.phase = self.parser.originalPhase
-            return True
+    def endTagIgnore(self, token):
+        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
 
-        def startTagOther(self, token):
-            assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name']
+    def endTagOther(self, token):
+        return self.parser.phases["inBody"].processEndTag(token)
 
-        def endTagScript(self, token):
-            node = self.tree.openElements.pop()
-            assert node.name == "script"
-            self.parser.phase = self.parser.originalPhase
-            # The rest of this method is all stuff that only happens if
-            # document.write works
+    startTagHandler = _utils.MethodDispatcher([
+        ("html", Phase.startTagHtml),
+        (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
+          "thead", "tr"), startTagTableElement)
+    ])
+    startTagHandler.default = startTagOther
 
-        def endTagOther(self, token):
-            self.tree.openElements.pop()
-            self.parser.phase = self.parser.originalPhase
-
-        startTagHandler = _utils.MethodDispatcher([])
-        startTagHandler.default = startTagOther
-        endTagHandler = _utils.MethodDispatcher([
-            ("script", endTagScript)])
-        endTagHandler.default = endTagOther
-
-    class InTablePhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#in-table
-        __slots__ = tuple()
-
-        # helper methods
-        def clearStackToTableContext(self):
-            # "clear the stack back to a table context"
-            while self.tree.openElements[-1].name not in ("table", "html"):
-                # self.parser.parseError("unexpected-implied-end-tag-in-table",
-                #  {"name":  self.tree.openElements[-1].name})
-                self.tree.openElements.pop()
-            # When the current node is <html> it's an innerHTML case
+    endTagHandler = _utils.MethodDispatcher([
+        ("caption", endTagCaption),
+        ("table", endTagTable),
+        (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
+          "thead", "tr"), endTagIgnore)
+    ])
+    endTagHandler.default = endTagOther
 
-        # processing methods
-        def processEOF(self):
-            if self.tree.openElements[-1].name != "html":
-                self.parser.parseError("eof-in-table")
-            else:
-                assert self.parser.innerHTML
-            # Stop parsing
-
-        def processSpaceCharacters(self, token):
-            originalPhase = self.parser.phase
-            self.parser.phase = self.parser.phases["inTableText"]
-            self.parser.phase.originalPhase = originalPhase
-            self.parser.phase.processSpaceCharacters(token)
-
-        def processCharacters(self, token):
-            originalPhase = self.parser.phase
-            self.parser.phase = self.parser.phases["inTableText"]
-            self.parser.phase.originalPhase = originalPhase
-            self.parser.phase.processCharacters(token)
-
-        def insertText(self, token):
-            # If we get here there must be at least one non-whitespace character
-            # Do the table magic!
-            self.tree.insertFromTable = True
-            self.parser.phases["inBody"].processCharacters(token)
-            self.tree.insertFromTable = False
-
-        def startTagCaption(self, token):
-            self.clearStackToTableContext()
-            self.tree.activeFormattingElements.append(Marker)
-            self.tree.insertElement(token)
-            self.parser.phase = self.parser.phases["inCaption"]
 
-        def startTagColgroup(self, token):
-            self.clearStackToTableContext()
-            self.tree.insertElement(token)
-            self.parser.phase = self.parser.phases["inColumnGroup"]
+class InColumnGroupPhase(Phase):
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-column
+    __slots__ = tuple()
 
-        def startTagCol(self, token):
-            self.startTagColgroup(impliedTagToken("colgroup", "StartTag"))
-            return token
+    def ignoreEndTagColgroup(self):
+        return self.tree.openElements[-1].name == "html"
 
-        def startTagRowGroup(self, token):
-            self.clearStackToTableContext()
-            self.tree.insertElement(token)
-            self.parser.phase = self.parser.phases["inTableBody"]
+    def processEOF(self):
+        if self.tree.openElements[-1].name == "html":
+            assert self.parser.innerHTML
+            return
+        else:
+            ignoreEndTag = self.ignoreEndTagColgroup()
+            self.endTagColgroup(impliedTagToken("colgroup"))
+            if not ignoreEndTag:
+                return True
 
-        def startTagImplyTbody(self, token):
-            self.startTagRowGroup(impliedTagToken("tbody", "StartTag"))
+    def processCharacters(self, token):
+        ignoreEndTag = self.ignoreEndTagColgroup()
+        self.endTagColgroup(impliedTagToken("colgroup"))
+        if not ignoreEndTag:
             return token
 
-        def startTagTable(self, token):
-            self.parser.parseError("unexpected-start-tag-implies-end-tag",
-                                   {"startName": "table", "endName": "table"})
-            self.parser.phase.processEndTag(impliedTagToken("table"))
-            if not self.parser.innerHTML:
-                return token
-
-        def startTagStyleScript(self, token):
-            return self.parser.phases["inHead"].processStartTag(token)
-
-        def startTagInput(self, token):
-            if ("type" in token["data"] and
-                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
-                self.parser.parseError("unexpected-hidden-input-in-table")
-                self.tree.insertElement(token)
-                # XXX associate with form
-                self.tree.openElements.pop()
-            else:
-                self.startTagOther(token)
-
-        def startTagForm(self, token):
-            self.parser.parseError("unexpected-form-in-table")
-            if self.tree.formPointer is None:
-                self.tree.insertElement(token)
-                self.tree.formPointer = self.tree.openElements[-1]
-                self.tree.openElements.pop()
-
-        def startTagOther(self, token):
-            self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]})
-            # Do the table magic!
-            self.tree.insertFromTable = True
-            self.parser.phases["inBody"].processStartTag(token)
-            self.tree.insertFromTable = False
-
-        def endTagTable(self, token):
-            if self.tree.elementInScope("table", variant="table"):
-                self.tree.generateImpliedEndTags()
-                if self.tree.openElements[-1].name != "table":
-                    self.parser.parseError("end-tag-too-early-named",
-                                           {"gotName": "table",
-                                            "expectedName": self.tree.openElements[-1].name})
-                while self.tree.openElements[-1].name != "table":
-                    self.tree.openElements.pop()
-                self.tree.openElements.pop()
-                self.parser.resetInsertionMode()
-            else:
-                # innerHTML case
-                assert self.parser.innerHTML
-                self.parser.parseError()
-
-        def endTagIgnore(self, token):
-            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+    def startTagCol(self, token):
+        self.tree.insertElement(token)
+        self.tree.openElements.pop()
+        token["selfClosingAcknowledged"] = True
 
-        def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]})
-            # Do the table magic!
-            self.tree.insertFromTable = True
-            self.parser.phases["inBody"].processEndTag(token)
-            self.tree.insertFromTable = False
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            ("caption", startTagCaption),
-            ("colgroup", startTagColgroup),
-            ("col", startTagCol),
-            (("tbody", "tfoot", "thead"), startTagRowGroup),
-            (("td", "th", "tr"), startTagImplyTbody),
-            ("table", startTagTable),
-            (("style", "script"), startTagStyleScript),
-            ("input", startTagInput),
-            ("form", startTagForm)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("table", endTagTable),
-            (("body", "caption", "col", "colgroup", "html", "tbody", "td",
-              "tfoot", "th", "thead", "tr"), endTagIgnore)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InTableTextPhase(Phase):
-        __slots__ = ("originalPhase", "characterTokens")
-
-        def __init__(self, *args, **kwargs):
-            super(InTableTextPhase, self).__init__(*args, **kwargs)
-            self.originalPhase = None
-            self.characterTokens = []
-
-        def flushCharacters(self):
-            data = "".join([item["data"] for item in self.characterTokens])
-            if any(item not in spaceCharacters for item in data):
-                token = {"type": tokenTypes["Characters"], "data": data}
-                self.parser.phases["inTable"].insertText(token)
-            elif data:
-                self.tree.insertText(data)
-            self.characterTokens = []
-
-        def processComment(self, token):
-            self.flushCharacters()
-            self.parser.phase = self.originalPhase
+    def startTagOther(self, token):
+        ignoreEndTag = self.ignoreEndTagColgroup()
+        self.endTagColgroup(impliedTagToken("colgroup"))
+        if not ignoreEndTag:
             return token
 
-        def processEOF(self):
-            self.flushCharacters()
-            self.parser.phase = self.originalPhase
-            return True
-
-        def processCharacters(self, token):
-            if token["data"] == "\u0000":
-                return
-            self.characterTokens.append(token)
-
-        def processSpaceCharacters(self, token):
-            # pretty sure we should never reach here
-            self.characterTokens.append(token)
-    #        assert False
+    def endTagColgroup(self, token):
+        if self.ignoreEndTagColgroup():
+            # innerHTML case
+            assert self.parser.innerHTML
+            self.parser.parseError()
+        else:
+            self.tree.openElements.pop()
+            self.parser.phase = self.parser.phases["inTable"]
 
-        def processStartTag(self, token):
-            self.flushCharacters()
-            self.parser.phase = self.originalPhase
-            return token
+    def endTagCol(self, token):
+        self.parser.parseError("no-end-tag", {"name": "col"})
 
-        def processEndTag(self, token):
-            self.flushCharacters()
-            self.parser.phase = self.originalPhase
+    def endTagOther(self, token):
+        ignoreEndTag = self.ignoreEndTagColgroup()
+        self.endTagColgroup(impliedTagToken("colgroup"))
+        if not ignoreEndTag:
             return token
 
-    class InCaptionPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
-        __slots__ = tuple()
+    startTagHandler = _utils.MethodDispatcher([
+        ("html", Phase.startTagHtml),
+        ("col", startTagCol)
+    ])
+    startTagHandler.default = startTagOther
+
+    endTagHandler = _utils.MethodDispatcher([
+        ("colgroup", endTagColgroup),
+        ("col", endTagCol)
+    ])
+    endTagHandler.default = endTagOther
+
+
+class InTableBodyPhase(Phase):
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
+    __slots__ = tuple()
+
+    # helper methods
+    def clearStackToTableBodyContext(self):
+        while self.tree.openElements[-1].name not in ("tbody", "tfoot",
+                                                      "thead", "html"):
+            # self.parser.parseError("unexpected-implied-end-tag-in-table",
+            #  {"name": self.tree.openElements[-1].name})
+            self.tree.openElements.pop()
+        if self.tree.openElements[-1].name == "html":
+            assert self.parser.innerHTML
 
-        def ignoreEndTagCaption(self):
-            return not self.tree.elementInScope("caption", variant="table")
+    # the rest
+    def processEOF(self):
+        self.parser.phases["inTable"].processEOF()
 
-        def processEOF(self):
-            self.parser.phases["inBody"].processEOF()
+    def processSpaceCharacters(self, token):
+        return self.parser.phases["inTable"].processSpaceCharacters(token)
 
-        def processCharacters(self, token):
-            return self.parser.phases["inBody"].processCharacters(token)
+    def processCharacters(self, token):
+        return self.parser.phases["inTable"].processCharacters(token)
 
-        def startTagTableElement(self, token):
+    def startTagTr(self, token):
+        self.clearStackToTableBodyContext()
+        self.tree.insertElement(token)
+        self.parser.phase = self.parser.phases["inRow"]
+
+    def startTagTableCell(self, token):
+        self.parser.parseError("unexpected-cell-in-table-body",
+                               {"name": token["name"]})
+        self.startTagTr(impliedTagToken("tr", "StartTag"))
+        return token
+
+    def startTagTableOther(self, token):
+        # XXX AT Any ideas on how to share this with endTagTable?
+        if (self.tree.elementInScope("tbody", variant="table") or
+            self.tree.elementInScope("thead", variant="table") or
+                self.tree.elementInScope("tfoot", variant="table")):
+            self.clearStackToTableBodyContext()
+            self.endTagTableRowGroup(
+                impliedTagToken(self.tree.openElements[-1].name))
+            return token
+        else:
+            # innerHTML case
+            assert self.parser.innerHTML
             self.parser.parseError()
-            # XXX Have to duplicate logic here to find out if the tag is ignored
-            ignoreEndTag = self.ignoreEndTagCaption()
-            self.parser.phase.processEndTag(impliedTagToken("caption"))
-            if not ignoreEndTag:
-                return token
 
-        def startTagOther(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
+    def startTagOther(self, token):
+        return self.parser.phases["inTable"].processStartTag(token)
 
-        def endTagCaption(self, token):
-            if not self.ignoreEndTagCaption():
-                # AT this code is quite similar to endTagTable in "InTable"
-                self.tree.generateImpliedEndTags()
-                if self.tree.openElements[-1].name != "caption":
-                    self.parser.parseError("expected-one-end-tag-but-got-another",
-                                           {"gotName": "caption",
-                                            "expectedName": self.tree.openElements[-1].name})
-                while self.tree.openElements[-1].name != "caption":
-                    self.tree.openElements.pop()
-                self.tree.openElements.pop()
-                self.tree.clearActiveFormattingElements()
-                self.parser.phase = self.parser.phases["inTable"]
-            else:
-                # innerHTML case
-                assert self.parser.innerHTML
-                self.parser.parseError()
+    def endTagTableRowGroup(self, token):
+        if self.tree.elementInScope(token["name"], variant="table"):
+            self.clearStackToTableBodyContext()
+            self.tree.openElements.pop()
+            self.parser.phase = self.parser.phases["inTable"]
+        else:
+            self.parser.parseError("unexpected-end-tag-in-table-body",
+                                   {"name": token["name"]})
 
-        def endTagTable(self, token):
+    def endTagTable(self, token):
+        if (self.tree.elementInScope("tbody", variant="table") or
+            self.tree.elementInScope("thead", variant="table") or
+                self.tree.elementInScope("tfoot", variant="table")):
+            self.clearStackToTableBodyContext()
+            self.endTagTableRowGroup(
+                impliedTagToken(self.tree.openElements[-1].name))
+            return token
+        else:
+            # innerHTML case
+            assert self.parser.innerHTML
             self.parser.parseError()
-            ignoreEndTag = self.ignoreEndTagCaption()
-            self.parser.phase.processEndTag(impliedTagToken("caption"))
-            if not ignoreEndTag:
-                return token
-
-        def endTagIgnore(self, token):
-            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-
-        def endTagOther(self, token):
-            return self.parser.phases["inBody"].processEndTag(token)
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
-              "thead", "tr"), startTagTableElement)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("caption", endTagCaption),
-            ("table", endTagTable),
-            (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
-              "thead", "tr"), endTagIgnore)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InColumnGroupPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#in-column
-        __slots__ = tuple()
-
-        def ignoreEndTagColgroup(self):
-            return self.tree.openElements[-1].name == "html"
-
-        def processEOF(self):
-            if self.tree.openElements[-1].name == "html":
-                assert self.parser.innerHTML
-                return
-            else:
-                ignoreEndTag = self.ignoreEndTagColgroup()
-                self.endTagColgroup(impliedTagToken("colgroup"))
-                if not ignoreEndTag:
-                    return True
 
-        def processCharacters(self, token):
-            ignoreEndTag = self.ignoreEndTagColgroup()
-            self.endTagColgroup(impliedTagToken("colgroup"))
-            if not ignoreEndTag:
-                return token
-
-        def startTagCol(self, token):
-            self.tree.insertElement(token)
+    def endTagIgnore(self, token):
+        self.parser.parseError("unexpected-end-tag-in-table-body",
+                               {"name": token["name"]})
+
+    def endTagOther(self, token):
+        return self.parser.phases["inTable"].processEndTag(token)
+
+    startTagHandler = _utils.MethodDispatcher([
+        ("html", Phase.startTagHtml),
+        ("tr", startTagTr),
+        (("td", "th"), startTagTableCell),
+        (("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
+         startTagTableOther)
+    ])
+    startTagHandler.default = startTagOther
+
+    endTagHandler = _utils.MethodDispatcher([
+        (("tbody", "tfoot", "thead"), endTagTableRowGroup),
+        ("table", endTagTable),
+        (("body", "caption", "col", "colgroup", "html", "td", "th",
+          "tr"), endTagIgnore)
+    ])
+    endTagHandler.default = endTagOther
+
+
+class InRowPhase(Phase):
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-row
+    __slots__ = tuple()
+
+    # helper methods (XXX unify this with other table helper methods)
+    def clearStackToTableRowContext(self):
+        while self.tree.openElements[-1].name not in ("tr", "html"):
+            self.parser.parseError("unexpected-implied-end-tag-in-table-row",
+                                   {"name": self.tree.openElements[-1].name})
             self.tree.openElements.pop()
-            token["selfClosingAcknowledged"] = True
 
-        def startTagOther(self, token):
-            ignoreEndTag = self.ignoreEndTagColgroup()
-            self.endTagColgroup(impliedTagToken("colgroup"))
-            if not ignoreEndTag:
-                return token
+    def ignoreEndTagTr(self):
+        return not self.tree.elementInScope("tr", variant="table")
 
-        def endTagColgroup(self, token):
-            if self.ignoreEndTagColgroup():
-                # innerHTML case
-                assert self.parser.innerHTML
-                self.parser.parseError()
-            else:
-                self.tree.openElements.pop()
-                self.parser.phase = self.parser.phases["inTable"]
+    # the rest
+    def processEOF(self):
+        self.parser.phases["inTable"].processEOF()
 
-        def endTagCol(self, token):
-            self.parser.parseError("no-end-tag", {"name": "col"})
+    def processSpaceCharacters(self, token):
+        return self.parser.phases["inTable"].processSpaceCharacters(token)
 
-        def endTagOther(self, token):
-            ignoreEndTag = self.ignoreEndTagColgroup()
-            self.endTagColgroup(impliedTagToken("colgroup"))
-            if not ignoreEndTag:
-                return token
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            ("col", startTagCol)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("colgroup", endTagColgroup),
-            ("col", endTagCol)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InTableBodyPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
-        __slots__ = tuple()
-
-        # helper methods
-        def clearStackToTableBodyContext(self):
-            while self.tree.openElements[-1].name not in ("tbody", "tfoot",
-                                                          "thead", "html"):
-                # self.parser.parseError("unexpected-implied-end-tag-in-table",
-                #  {"name": self.tree.openElements[-1].name})
-                self.tree.openElements.pop()
-            if self.tree.openElements[-1].name == "html":
-                assert self.parser.innerHTML
+    def processCharacters(self, token):
+        return self.parser.phases["inTable"].processCharacters(token)
 
-        # the rest
-        def processEOF(self):
-            self.parser.phases["inTable"].processEOF()
+    def startTagTableCell(self, token):
+        self.clearStackToTableRowContext()
+        self.tree.insertElement(token)
+        self.parser.phase = self.parser.phases["inCell"]
+        self.tree.activeFormattingElements.append(Marker)
+
+    def startTagTableOther(self, token):
+        ignoreEndTag = self.ignoreEndTagTr()
+        self.endTagTr(impliedTagToken("tr"))
+        # XXX how are we sure it's always ignored in the innerHTML case?
+        if not ignoreEndTag:
+            return token
 
-        def processSpaceCharacters(self, token):
-            return self.parser.phases["inTable"].processSpaceCharacters(token)
+    def startTagOther(self, token):
+        return self.parser.phases["inTable"].processStartTag(token)
 
-        def processCharacters(self, token):
-            return self.parser.phases["inTable"].processCharacters(token)
+    def endTagTr(self, token):
+        if not self.ignoreEndTagTr():
+            self.clearStackToTableRowContext()
+            self.tree.openElements.pop()
+            self.parser.phase = self.parser.phases["inTableBody"]
+        else:
+            # innerHTML case
+            assert self.parser.innerHTML
+            self.parser.parseError()
 
-        def startTagTr(self, token):
-            self.clearStackToTableBodyContext()
-            self.tree.insertElement(token)
-            self.parser.phase = self.parser.phases["inRow"]
+    def endTagTable(self, token):
+        ignoreEndTag = self.ignoreEndTagTr()
+        self.endTagTr(impliedTagToken("tr"))
+        # Reprocess the current tag if the tr end tag was not ignored
+        # XXX how are we sure it's always ignored in the innerHTML case?
+        if not ignoreEndTag:
+            return token
 
-        def startTagTableCell(self, token):
-            self.parser.parseError("unexpected-cell-in-table-body",
-                                   {"name": token["name"]})
-            self.startTagTr(impliedTagToken("tr", "StartTag"))
+    def endTagTableRowGroup(self, token):
+        if self.tree.elementInScope(token["name"], variant="table"):
+            self.endTagTr(impliedTagToken("tr"))
             return token
+        else:
+            self.parser.parseError()
 
-        def startTagTableOther(self, token):
-            # XXX AT Any ideas on how to share this with endTagTable?
-            if (self.tree.elementInScope("tbody", variant="table") or
-                self.tree.elementInScope("thead", variant="table") or
-                    self.tree.elementInScope("tfoot", variant="table")):
-                self.clearStackToTableBodyContext()
-                self.endTagTableRowGroup(
-                    impliedTagToken(self.tree.openElements[-1].name))
-                return token
-            else:
-                # innerHTML case
-                assert self.parser.innerHTML
-                self.parser.parseError()
+    def endTagIgnore(self, token):
+        self.parser.parseError("unexpected-end-tag-in-table-row",
+                               {"name": token["name"]})
+
+    def endTagOther(self, token):
+        return self.parser.phases["inTable"].processEndTag(token)
+
+    startTagHandler = _utils.MethodDispatcher([
+        ("html", Phase.startTagHtml),
+        (("td", "th"), startTagTableCell),
+        (("caption", "col", "colgroup", "tbody", "tfoot", "thead",
+          "tr"), startTagTableOther)
+    ])
+    startTagHandler.default = startTagOther
+
+    endTagHandler = _utils.MethodDispatcher([
+        ("tr", endTagTr),
+        ("table", endTagTable),
+        (("tbody", "tfoot", "thead"), endTagTableRowGroup),
+        (("body", "caption", "col", "colgroup", "html", "td", "th"),
+         endTagIgnore)
+    ])
+    endTagHandler.default = endTagOther
+
+
+class InCellPhase(Phase):
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
+    __slots__ = tuple()
+
+    # helper
+    def closeCell(self):
+        if self.tree.elementInScope("td", variant="table"):
+            self.endTagTableCell(impliedTagToken("td"))
+        elif self.tree.elementInScope("th", variant="table"):
+            self.endTagTableCell(impliedTagToken("th"))
+
+    # the rest
+    def processEOF(self):
+        self.parser.phases["inBody"].processEOF()
+
+    def processCharacters(self, token):
+        return self.parser.phases["inBody"].processCharacters(token)
+
+    def startTagTableOther(self, token):
+        if (self.tree.elementInScope("td", variant="table") or
+                self.tree.elementInScope("th", variant="table")):
+            self.closeCell()
+            return token
+        else:
+            # innerHTML case
+            assert self.parser.innerHTML
+            self.parser.parseError()
 
-        def startTagOther(self, token):
-            return self.parser.phases["inTable"].processStartTag(token)
+    def startTagOther(self, token):
+        return self.parser.phases["inBody"].processStartTag(token)
 
-        def endTagTableRowGroup(self, token):
-            if self.tree.elementInScope(token["name"], variant="table"):
-                self.clearStackToTableBodyContext()
-                self.tree.openElements.pop()
-                self.parser.phase = self.parser.phases["inTable"]
-            else:
-                self.parser.parseError("unexpected-end-tag-in-table-body",
+    def endTagTableCell(self, token):
+        if self.tree.elementInScope(token["name"], variant="table"):
+            self.tree.generateImpliedEndTags(token["name"])
+            if self.tree.openElements[-1].name != token["name"]:
+                self.parser.parseError("unexpected-cell-end-tag",
                                        {"name": token["name"]})
-
-        def endTagTable(self, token):
-            if (self.tree.elementInScope("tbody", variant="table") or
-                self.tree.elementInScope("thead", variant="table") or
-                    self.tree.elementInScope("tfoot", variant="table")):
-                self.clearStackToTableBodyContext()
-                self.endTagTableRowGroup(
-                    impliedTagToken(self.tree.openElements[-1].name))
-                return token
+                while True:
+                    node = self.tree.openElements.pop()
+                    if node.name == token["name"]:
+                        break
             else:
-                # innerHTML case
-                assert self.parser.innerHTML
-                self.parser.parseError()
-
-        def endTagIgnore(self, token):
-            self.parser.parseError("unexpected-end-tag-in-table-body",
-                                   {"name": token["name"]})
-
-        def endTagOther(self, token):
-            return self.parser.phases["inTable"].processEndTag(token)
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            ("tr", startTagTr),
-            (("td", "th"), startTagTableCell),
-            (("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
-             startTagTableOther)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            (("tbody", "tfoot", "thead"), endTagTableRowGroup),
-            ("table", endTagTable),
-            (("body", "caption", "col", "colgroup", "html", "td", "th",
-              "tr"), endTagIgnore)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InRowPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#in-row
-        __slots__ = tuple()
-
-        # helper methods (XXX unify this with other table helper methods)
-        def clearStackToTableRowContext(self):
-            while self.tree.openElements[-1].name not in ("tr", "html"):
-                self.parser.parseError("unexpected-implied-end-tag-in-table-row",
-                                       {"name": self.tree.openElements[-1].name})
-                self.tree.openElements.pop()
-
-        def ignoreEndTagTr(self):
-            return not self.tree.elementInScope("tr", variant="table")
-
-        # the rest
-        def processEOF(self):
-            self.parser.phases["inTable"].processEOF()
-
-        def processSpaceCharacters(self, token):
-            return self.parser.phases["inTable"].processSpaceCharacters(token)
-
-        def processCharacters(self, token):
-            return self.parser.phases["inTable"].processCharacters(token)
-
-        def startTagTableCell(self, token):
-            self.clearStackToTableRowContext()
-            self.tree.insertElement(token)
-            self.parser.phase = self.parser.phases["inCell"]
-            self.tree.activeFormattingElements.append(Marker)
-
-        def startTagTableOther(self, token):
-            ignoreEndTag = self.ignoreEndTagTr()
-            self.endTagTr(impliedTagToken("tr"))
-            # XXX how are we sure it's always ignored in the innerHTML case?
-            if not ignoreEndTag:
-                return token
-
-        def startTagOther(self, token):
-            return self.parser.phases["inTable"].processStartTag(token)
-
-        def endTagTr(self, token):
-            if not self.ignoreEndTagTr():
-                self.clearStackToTableRowContext()
                 self.tree.openElements.pop()
-                self.parser.phase = self.parser.phases["inTableBody"]
-            else:
-                # innerHTML case
-                assert self.parser.innerHTML
-                self.parser.parseError()
+            self.tree.clearActiveFormattingElements()
+            self.parser.phase = self.parser.phases["inRow"]
+        else:
+            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
 
-        def endTagTable(self, token):
-            ignoreEndTag = self.ignoreEndTagTr()
-            self.endTagTr(impliedTagToken("tr"))
-            # Reprocess the current tag if the tr end tag was not ignored
-            # XXX how are we sure it's always ignored in the innerHTML case?
-            if not ignoreEndTag:
-                return token
+    def endTagIgnore(self, token):
+        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
 
-        def endTagTableRowGroup(self, token):
-            if self.tree.elementInScope(token["name"], variant="table"):
-                self.endTagTr(impliedTagToken("tr"))
-                return token
-            else:
-                self.parser.parseError()
+    def endTagImply(self, token):
+        if self.tree.elementInScope(token["name"], variant="table"):
+            self.closeCell()
+            return token
+        else:
+            # sometimes innerHTML case
+            self.parser.parseError()
 
-        def endTagIgnore(self, token):
-            self.parser.parseError("unexpected-end-tag-in-table-row",
-                                   {"name": token["name"]})
+    def endTagOther(self, token):
+        return self.parser.phases["inBody"].processEndTag(token)
 
-        def endTagOther(self, token):
-            return self.parser.phases["inTable"].processEndTag(token)
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            (("td", "th"), startTagTableCell),
-            (("caption", "col", "colgroup", "tbody", "tfoot", "thead",
-              "tr"), startTagTableOther)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("tr", endTagTr),
-            ("table", endTagTable),
-            (("tbody", "tfoot", "thead"), endTagTableRowGroup),
-            (("body", "caption", "col", "colgroup", "html", "td", "th"),
-             endTagIgnore)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InCellPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
-        __slots__ = tuple()
-
-        # helper
-        def closeCell(self):
-            if self.tree.elementInScope("td", variant="table"):
-                self.endTagTableCell(impliedTagToken("td"))
-            elif self.tree.elementInScope("th", variant="table"):
-                self.endTagTableCell(impliedTagToken("th"))
-
-        # the rest
-        def processEOF(self):
-            self.parser.phases["inBody"].processEOF()
-
-        def processCharacters(self, token):
-            return self.parser.phases["inBody"].processCharacters(token)
-
-        def startTagTableOther(self, token):
-            if (self.tree.elementInScope("td", variant="table") or
-                    self.tree.elementInScope("th", variant="table")):
-                self.closeCell()
-                return token
-            else:
-                # innerHTML case
-                assert self.parser.innerHTML
-                self.parser.parseError()
+    startTagHandler = _utils.MethodDispatcher([
+        ("html", Phase.startTagHtml),
+        (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
+          "thead", "tr"), startTagTableOther)
+    ])
+    startTagHandler.default = startTagOther
 
-        def startTagOther(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
+    endTagHandler = _utils.MethodDispatcher([
+        (("td", "th"), endTagTableCell),
+        (("body", "caption", "col", "colgroup", "html"), endTagIgnore),
+        (("table", "tbody", "tfoot", "thead", "tr"), endTagImply)
+    ])
+    endTagHandler.default = endTagOther
 
-        def endTagTableCell(self, token):
-            if self.tree.elementInScope(token["name"], variant="table"):
-                self.tree.generateImpliedEndTags(token["name"])
-                if self.tree.openElements[-1].name != token["name"]:
-                    self.parser.parseError("unexpected-cell-end-tag",
-                                           {"name": token["name"]})
-                    while True:
-                        node = self.tree.openElements.pop()
-                        if node.name == token["name"]:
-                            break
-                else:
-                    self.tree.openElements.pop()
-                self.tree.clearActiveFormattingElements()
-                self.parser.phase = self.parser.phases["inRow"]
-            else:
-                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
 
-        def endTagIgnore(self, token):
-            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+class InSelectPhase(Phase):
+    __slots__ = tuple()
 
-        def endTagImply(self, token):
-            if self.tree.elementInScope(token["name"], variant="table"):
-                self.closeCell()
-                return token
-            else:
-                # sometimes innerHTML case
-                self.parser.parseError()
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-select
+    def processEOF(self):
+        if self.tree.openElements[-1].name != "html":
+            self.parser.parseError("eof-in-select")
+        else:
+            assert self.parser.innerHTML
 
-        def endTagOther(self, token):
-            return self.parser.phases["inBody"].processEndTag(token)
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
-              "thead", "tr"), startTagTableOther)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            (("td", "th"), endTagTableCell),
-            (("body", "caption", "col", "colgroup", "html"), endTagIgnore),
-            (("table", "tbody", "tfoot", "thead", "tr"), endTagImply)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InSelectPhase(Phase):
-        __slots__ = tuple()
-
-        # http://www.whatwg.org/specs/web-apps/current-work/#in-select
-        def processEOF(self):
-            if self.tree.openElements[-1].name != "html":
-                self.parser.parseError("eof-in-select")
-            else:
-                assert self.parser.innerHTML
+    def processCharacters(self, token):
+        if token["data"] == "\u0000":
+            return
+        self.tree.insertText(token["data"])
 
-        def processCharacters(self, token):
-            if token["data"] == "\u0000":
-                return
-            self.tree.insertText(token["data"])
+    def startTagOption(self, token):
+        # We need to imply </option> if <option> is the current node.
+        if self.tree.openElements[-1].name == "option":
+            self.tree.openElements.pop()
+        self.tree.insertElement(token)
 
-        def startTagOption(self, token):
-            # We need to imply </option> if <option> is the current node.
-            if self.tree.openElements[-1].name == "option":
-                self.tree.openElements.pop()
-            self.tree.insertElement(token)
+    def startTagOptgroup(self, token):
+        if self.tree.openElements[-1].name == "option":
+            self.tree.openElements.pop()
+        if self.tree.openElements[-1].name == "optgroup":
+            self.tree.openElements.pop()
+        self.tree.insertElement(token)
 
-        def startTagOptgroup(self, token):
-            if self.tree.openElements[-1].name == "option":
-                self.tree.openElements.pop()
-            if self.tree.openElements[-1].name == "optgroup":
-                self.tree.openElements.pop()
-            self.tree.insertElement(token)
+    def startTagSelect(self, token):
+        self.parser.parseError("unexpected-select-in-select")
+        self.endTagSelect(impliedTagToken("select"))
 
-        def startTagSelect(self, token):
-            self.parser.parseError("unexpected-select-in-select")
+    def startTagInput(self, token):
+        self.parser.parseError("unexpected-input-in-select")
+        if self.tree.elementInScope("select", variant="select"):
             self.endTagSelect(impliedTagToken("select"))
+            return token
+        else:
+            assert self.parser.innerHTML
 
-        def startTagInput(self, token):
-            self.parser.parseError("unexpected-input-in-select")
-            if self.tree.elementInScope("select", variant="select"):
-                self.endTagSelect(impliedTagToken("select"))
-                return token
-            else:
-                assert self.parser.innerHTML
-
-        def startTagScript(self, token):
-            return self.parser.phases["inHead"].processStartTag(token)
+    def startTagScript(self, token):
+        return self.parser.phases["inHead"].processStartTag(token)
 
-        def startTagOther(self, token):
-            self.parser.parseError("unexpected-start-tag-in-select",
-                                   {"name": token["name"]})
+    def startTagOther(self, token):
+        self.parser.parseError("unexpected-start-tag-in-select",
+                               {"name": token["name"]})
 
-        def endTagOption(self, token):
-            if self.tree.openElements[-1].name == "option":
-                self.tree.openElements.pop()
-            else:
-                self.parser.parseError("unexpected-end-tag-in-select",
-                                       {"name": "option"})
+    def endTagOption(self, token):
+        if self.tree.openElements[-1].name == "option":
+            self.tree.openElements.pop()
+        else:
+            self.parser.parseError("unexpected-end-tag-in-select",
+                                   {"name": "option"})
 
-        def endTagOptgroup(self, token):
-            # </optgroup> implicitly closes <option>
-            if (self.tree.openElements[-1].name == "option" and
-                    self.tree.openElements[-2].name == "optgroup"):
-                self.tree.openElements.pop()
-            # It also closes </optgroup>
-            if self.tree.openElements[-1].name == "optgroup":
-                self.tree.openElements.pop()
-            # But nothing else
-            else:
-                self.parser.parseError("unexpected-end-tag-in-select",
-                                       {"name": "optgroup"})
+    def endTagOptgroup(self, token):
+        # </optgroup> implicitly closes <option>
+        if (self.tree.openElements[-1].name == "option" and
+                self.tree.openElements[-2].name == "optgroup"):
+            self.tree.openElements.pop()
+        # It also closes </optgroup>
+        if self.tree.openElements[-1].name == "optgroup":
+            self.tree.openElements.pop()
+        # But nothing else
+        else:
+            self.parser.parseError("unexpected-end-tag-in-select",
+                                   {"name": "optgroup"})
 
-        def endTagSelect(self, token):
-            if self.tree.elementInScope("select", variant="select"):
+    def endTagSelect(self, token):
+        if self.tree.elementInScope("select", variant="select"):
+            node = self.tree.openElements.pop()
+            while node.name != "select":
                 node = self.tree.openElements.pop()
-                while node.name != "select":
-                    node = self.tree.openElements.pop()
-                self.parser.resetInsertionMode()
-            else:
-                # innerHTML case
-                assert self.parser.innerHTML
-                self.parser.parseError()
-
-        def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag-in-select",
-                                   {"name": token["name"]})
+            self.parser.resetInsertionMode()
+        else:
+            # innerHTML case
+            assert self.parser.innerHTML
+            self.parser.parseError()
 
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            ("option", startTagOption),
-            ("optgroup", startTagOptgroup),
-            ("select", startTagSelect),
-            (("input", "keygen", "textarea"), startTagInput),
-            ("script", startTagScript)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("option", endTagOption),
-            ("optgroup", endTagOptgroup),
-            ("select", endTagSelect)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InSelectInTablePhase(Phase):
-        __slots__ = tuple()
-
-        def processEOF(self):
-            self.parser.phases["inSelect"].processEOF()
-
-        def processCharacters(self, token):
-            return self.parser.phases["inSelect"].processCharacters(token)
-
-        def startTagTable(self, token):
-            self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]})
-            self.endTagOther(impliedTagToken("select"))
-            return token
+    def endTagOther(self, token):
+        self.parser.parseError("unexpected-end-tag-in-select",
+                               {"name": token["name"]})
 
-        def startTagOther(self, token):
-            return self.parser.phases["inSelect"].processStartTag(token)
-
-        def endTagTable(self, token):
-            self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]})
-            if self.tree.elementInScope(token["name"], variant="table"):
-                self.endTagOther(impliedTagToken("select"))
-                return token
-
-        def endTagOther(self, token):
-            return self.parser.phases["inSelect"].processEndTag(token)
-
-        startTagHandler = _utils.MethodDispatcher([
-            (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
-             startTagTable)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
-             endTagTable)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InForeignContentPhase(Phase):
-        __slots__ = tuple()
-
-        breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
-                                      "center", "code", "dd", "div", "dl", "dt",
-                                      "em", "embed", "h1", "h2", "h3",
-                                      "h4", "h5", "h6", "head", "hr", "i", "img",
-                                      "li", "listing", "menu", "meta", "nobr",
-                                      "ol", "p", "pre", "ruby", "s", "small",
-                                      "span", "strong", "strike", "sub", "sup",
-                                      "table", "tt", "u", "ul", "var"])
-
-        def adjustSVGTagNames(self, token):
-            replacements = {"altglyph": "altGlyph",
-                            "altglyphdef": "altGlyphDef",
-                            "altglyphitem": "altGlyphItem",
-                            "animatecolor": "animateColor",
-                            "animatemotion": "animateMotion",
-                            "animatetransform": "animateTransform",
-                            "clippath": "clipPath",
-                            "feblend": "feBlend",
-                            "fecolormatrix": "feColorMatrix",
-                            "fecomponenttransfer": "feComponentTransfer",
-                            "fecomposite": "feComposite",
-                            "feconvolvematrix": "feConvolveMatrix",
-                            "fediffuselighting": "feDiffuseLighting",
-                            "fedisplacementmap": "feDisplacementMap",
-                            "fedistantlight": "feDistantLight",
-                            "feflood": "feFlood",
-                            "fefunca": "feFuncA",
-                            "fefuncb": "feFuncB",
-                            "fefuncg": "feFuncG",
-                            "fefuncr": "feFuncR",
-                            "fegaussianblur": "feGaussianBlur",
-                            "feimage": "feImage",
-                            "femerge": "feMerge",
-                            "femergenode": "feMergeNode",
-                            "femorphology": "feMorphology",
-                            "feoffset": "feOffset",
-                            "fepointlight": "fePointLight",
-                            "fespecularlighting": "feSpecularLighting",
-                            "fespotlight": "feSpotLight",
-                            "fetile": "feTile",
-                            "feturbulence": "feTurbulence",
-                            "foreignobject": "foreignObject",
-                            "glyphref": "glyphRef",
-                            "lineargradient": "linearGradient",
-                            "radialgradient": "radialGradient",
-                            "textpath": "textPath"}
-
-            if token["name"] in replacements:
-                token["name"] = replacements[token["name"]]
-
-        def processCharacters(self, token):
-            if token["data"] == "\u0000":
-                token["data"] = "\uFFFD"
-            elif (self.parser.framesetOK and
-                  any(char not in spaceCharacters for char in token["data"])):
-                self.parser.framesetOK = False
-            Phase.processCharacters(self, token)
-
-        def processStartTag(self, token):
-            currentNode = self.tree.openElements[-1]
-            if (token["name"] in self.breakoutElements or
-                (token["name"] == "font" and
-                 set(token["data"].keys()) & {"color", "face", "size"})):
-                self.parser.parseError("unexpected-html-element-in-foreign-content",
-                                       {"name": token["name"]})
-                while (self.tree.openElements[-1].namespace !=
-                       self.tree.defaultNamespace and
-                       not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and
-                       not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])):
-                    self.tree.openElements.pop()
-                return token
+    startTagHandler = _utils.MethodDispatcher([
+        ("html", Phase.startTagHtml),
+        ("option", startTagOption),
+        ("optgroup", startTagOptgroup),
+        ("select", startTagSelect),
+        (("input", "keygen", "textarea"), startTagInput),
+        ("script", startTagScript)
+    ])
+    startTagHandler.default = startTagOther
 
-            else:
-                if currentNode.namespace == namespaces["mathml"]:
-                    self.parser.adjustMathMLAttributes(token)
-                elif currentNode.namespace == namespaces["svg"]:
-                    self.adjustSVGTagNames(token)
-                    self.parser.adjustSVGAttributes(token)
-                self.parser.adjustForeignAttributes(token)
-                token["namespace"] = currentNode.namespace
-                self.tree.insertElement(token)
-                if token["selfClosing"]:
-                    self.tree.openElements.pop()
-                    token["selfClosingAcknowledged"] = True
-
-        def processEndTag(self, token):
-            nodeIndex = len(self.tree.openElements) - 1
-            node = self.tree.openElements[-1]
-            if node.name.translate(asciiUpper2Lower) != token["name"]:
-                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-
-            while True:
-                if node.name.translate(asciiUpper2Lower) == token["name"]:
-                    # XXX this isn't in the spec but it seems necessary
-                    if self.parser.phase == self.parser.phases["inTableText"]:
-                        self.parser.phase.flushCharacters()
-                        self.parser.phase = self.parser.phase.originalPhase
-                    while self.tree.openElements.pop() != node:
-                        assert self.tree.openElements
-                    new_token = None
-                    break
-                nodeIndex -= 1
+    endTagHandler = _utils.MethodDispatcher([
+        ("option", endTagOption),
+        ("optgroup", endTagOptgroup),
+        ("select", endTagSelect)
+    ])
+    endTagHandler.default = endTagOther
 
-                node = self.tree.openElements[nodeIndex]
-                if node.namespace != self.tree.defaultNamespace:
-                    continue
-                else:
-                    new_token = self.parser.phase.processEndTag(token)
-                    break
-            return new_token
 
-    class AfterBodyPhase(Phase):
-        __slots__ = tuple()
+class InSelectInTablePhase(Phase):
+    __slots__ = tuple()
 
-        def processEOF(self):
-            # Stop parsing
-            pass
+    def processEOF(self):
+        self.parser.phases["inSelect"].processEOF()
 
-        def processComment(self, token):
-            # This is needed because data is to be appended to the <html> element
-            # here and not to whatever is currently open.
-            self.tree.insertComment(token, self.tree.openElements[0])
+    def processCharacters(self, token):
+        return self.parser.phases["inSelect"].processCharacters(token)
 
-        def processCharacters(self, token):
-            self.parser.parseError("unexpected-char-after-body")
-            self.parser.phase = self.parser.phases["inBody"]
-            return token
+    def startTagTable(self, token):
+        self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]})
+        self.endTagOther(impliedTagToken("select"))
+        return token
 
-        def startTagHtml(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
+    def startTagOther(self, token):
+        return self.parser.phases["inSelect"].processStartTag(token)
 
-        def startTagOther(self, token):
-            self.parser.parseError("unexpected-start-tag-after-body",
-                                   {"name": token["name"]})
-            self.parser.phase = self.parser.phases["inBody"]
+    def endTagTable(self, token):
+        self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]})
+        if self.tree.elementInScope(token["name"], variant="table"):
+            self.endTagOther(impliedTagToken("select"))
             return token
 
-        def endTagHtml(self, name):
-            if self.parser.innerHTML:
-                self.parser.parseError("unexpected-end-tag-after-body-innerhtml")
-            else:
-                self.parser.phase = self.parser.phases["afterAfterBody"]
-
-        def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag-after-body",
+    def endTagOther(self, token):
+        return self.parser.phases["inSelect"].processEndTag(token)
+
+    startTagHandler = _utils.MethodDispatcher([
+        (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
+         startTagTable)
+    ])
+    startTagHandler.default = startTagOther
+
+    endTagHandler = _utils.MethodDispatcher([
+        (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
+         endTagTable)
+    ])
+    endTagHandler.default = endTagOther
+
+
+class InForeignContentPhase(Phase):
+    __slots__ = tuple()
+
+    breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
+                                  "center", "code", "dd", "div", "dl", "dt",
+                                  "em", "embed", "h1", "h2", "h3",
+                                  "h4", "h5", "h6", "head", "hr", "i", "img",
+                                  "li", "listing", "menu", "meta", "nobr",
+                                  "ol", "p", "pre", "ruby", "s", "small",
+                                  "span", "strong", "strike", "sub", "sup",
+                                  "table", "tt", "u", "ul", "var"])
+
+    def adjustSVGTagNames(self, token):
+        replacements = {"altglyph": "altGlyph",
+                        "altglyphdef": "altGlyphDef",
+                        "altglyphitem": "altGlyphItem",
+                        "animatecolor": "animateColor",
+                        "animatemotion": "animateMotion",
+                        "animatetransform": "animateTransform",
+                        "clippath": "clipPath",
+                        "feblend": "feBlend",
+                        "fecolormatrix": "feColorMatrix",
+                        "fecomponenttransfer": "feComponentTransfer",
+                        "fecomposite": "feComposite",
+                        "feconvolvematrix": "feConvolveMatrix",
+                        "fediffuselighting": "feDiffuseLighting",
+                        "fedisplacementmap": "feDisplacementMap",
+                        "fedistantlight": "feDistantLight",
+                        "feflood": "feFlood",
+                        "fefunca": "feFuncA",
+                        "fefuncb": "feFuncB",
+                        "fefuncg": "feFuncG",
+                        "fefuncr": "feFuncR",
+                        "fegaussianblur": "feGaussianBlur",
+                        "feimage": "feImage",
+                        "femerge": "feMerge",
+                        "femergenode": "feMergeNode",
+                        "femorphology": "feMorphology",
+                        "feoffset": "feOffset",
+                        "fepointlight": "fePointLight",
+                        "fespecularlighting": "feSpecularLighting",
+                        "fespotlight": "feSpotLight",
+                        "fetile": "feTile",
+                        "feturbulence": "feTurbulence",
+                        "foreignobject": "foreignObject",
+                        "glyphref": "glyphRef",
+                        "lineargradient": "linearGradient",
+                        "radialgradient": "radialGradient",
+                        "textpath": "textPath"}
+
+        if token["name"] in replacements:
+            token["name"] = replacements[token["name"]]
+
+    def processCharacters(self, token):
+        if token["data"] == "\u0000":
+            token["data"] = "\uFFFD"
+        elif (self.parser.framesetOK and
+              any(char not in spaceCharacters for char in token["data"])):
+            self.parser.framesetOK = False
+        Phase.processCharacters(self, token)
+
+    def processStartTag(self, token):
+        currentNode = self.tree.openElements[-1]
+        if (token["name"] in self.breakoutElements or
+            (token["name"] == "font" and
+             set(token["data"].keys()) & {"color", "face", "size"})):
+            self.parser.parseError("unexpected-html-element-in-foreign-content",
                                    {"name": token["name"]})
-            self.parser.phase = self.parser.phases["inBody"]
+            while (self.tree.openElements[-1].namespace !=
+                   self.tree.defaultNamespace and
+                   not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and
+                   not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])):
+                self.tree.openElements.pop()
             return token
 
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", startTagHtml)
-        ])
-        startTagHandler.default = startTagOther
+        else:
+            if currentNode.namespace == namespaces["mathml"]:
+                self.parser.adjustMathMLAttributes(token)
+            elif currentNode.namespace == namespaces["svg"]:
+                self.adjustSVGTagNames(token)
+                self.parser.adjustSVGAttributes(token)
+            self.parser.adjustForeignAttributes(token)
+            token["namespace"] = currentNode.namespace
+            self.tree.insertElement(token)
+            if token["selfClosing"]:
+                self.tree.openElements.pop()
+                token["selfClosingAcknowledged"] = True
 
-        endTagHandler = _utils.MethodDispatcher([("html", endTagHtml)])
-        endTagHandler.default = endTagOther
+    def processEndTag(self, token):
+        nodeIndex = len(self.tree.openElements) - 1
+        node = self.tree.openElements[-1]
+        if node.name.translate(asciiUpper2Lower) != token["name"]:
+            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
 
-    class InFramesetPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
-        __slots__ = tuple()
+        while True:
+            if node.name.translate(asciiUpper2Lower) == token["name"]:
+                # XXX this isn't in the spec but it seems necessary
+                if self.parser.phase == self.parser.phases["inTableText"]:
+                    self.parser.phase.flushCharacters()
+                    self.parser.phase = self.parser.phase.originalPhase
+                while self.tree.openElements.pop() != node:
+                    assert self.tree.openElements
+                new_token = None
+                break
+            nodeIndex -= 1
 
-        def processEOF(self):
-            if self.tree.openElements[-1].name != "html":
-                self.parser.parseError("eof-in-frameset")
+            node = self.tree.openElements[nodeIndex]
+            if node.namespace != self.tree.defaultNamespace:
+                continue
             else:
-                assert self.parser.innerHTML
+                new_token = self.parser.phase.processEndTag(token)
+                break
+        return new_token
 
-        def processCharacters(self, token):
-            self.parser.parseError("unexpected-char-in-frameset")
 
-        def startTagFrameset(self, token):
-            self.tree.insertElement(token)
+class AfterBodyPhase(Phase):
+    __slots__ = tuple()
 
-        def startTagFrame(self, token):
-            self.tree.insertElement(token)
-            self.tree.openElements.pop()
+    def processEOF(self):
+        # Stop parsing
+        pass
 
-        def startTagNoframes(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
+    def processComment(self, token):
+        # This is needed because data is to be appended to the <html> element
+        # here and not to whatever is currently open.
+        self.tree.insertComment(token, self.tree.openElements[0])
 
-        def startTagOther(self, token):
-            self.parser.parseError("unexpected-start-tag-in-frameset",
-                                   {"name": token["name"]})
+    def processCharacters(self, token):
+        self.parser.parseError("unexpected-char-after-body")
+        self.parser.phase = self.parser.phases["inBody"]
+        return token
 
-        def endTagFrameset(self, token):
-            if self.tree.openElements[-1].name == "html":
-                # innerHTML case
-                self.parser.parseError("unexpected-frameset-in-frameset-innerhtml")
-            else:
-                self.tree.openElements.pop()
-            if (not self.parser.innerHTML and
-                    self.tree.openElements[-1].name != "frameset"):
-                # If we're not in innerHTML mode and the current node is not a
-                # "frameset" element (anymore) then switch.
-                self.parser.phase = self.parser.phases["afterFrameset"]
-
-        def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag-in-frameset",
-                                   {"name": token["name"]})
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            ("frameset", startTagFrameset),
-            ("frame", startTagFrame),
-            ("noframes", startTagNoframes)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("frameset", endTagFrameset)
-        ])
-        endTagHandler.default = endTagOther
-
-    class AfterFramesetPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#after3
-        __slots__ = tuple()
-
-        def processEOF(self):
-            # Stop parsing
-            pass
+    def startTagHtml(self, token):
+        return self.parser.phases["inBody"].processStartTag(token)
 
-        def processCharacters(self, token):
-            self.parser.parseError("unexpected-char-after-frameset")
+    def startTagOther(self, token):
+        self.parser.parseError("unexpected-start-tag-after-body",
+                               {"name": token["name"]})
+        self.parser.phase = self.parser.phases["inBody"]
+        return token
 
-        def startTagNoframes(self, token):
-            return self.parser.phases["inHead"].processStartTag(token)
+    def endTagHtml(self, name):
+        if self.parser.innerHTML:
+            self.parser.parseError("unexpected-end-tag-after-body-innerhtml")
+        else:
+            self.parser.phase = self.parser.phases["afterAfterBody"]
 
-        def startTagOther(self, token):
-            self.parser.parseError("unexpected-start-tag-after-frameset",
-                                   {"name": token["name"]})
+    def endTagOther(self, token):
+        self.parser.parseError("unexpected-end-tag-after-body",
+                               {"name": token["name"]})
+        self.parser.phase = self.parser.phases["inBody"]
+        return token
 
-        def endTagHtml(self, token):
-            self.parser.phase = self.parser.phases["afterAfterFrameset"]
+    startTagHandler = _utils.MethodDispatcher([
+        ("html", startTagHtml)
+    ])
+    startTagHandler.default = startTagOther
 
-        def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag-after-frameset",
-                                   {"name": token["name"]})
+    endTagHandler = _utils.MethodDispatcher([("html", endTagHtml)])
+    endTagHandler.default = endTagOther
 
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            ("noframes", startTagNoframes)
-        ])
-        startTagHandler.default = startTagOther
 
-        endTagHandler = _utils.MethodDispatcher([
-            ("html", endTagHtml)
-        ])
-        endTagHandler.default = endTagOther
+class InFramesetPhase(Phase):
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
+    __slots__ = tuple()
 
-    class AfterAfterBodyPhase(Phase):
-        __slots__ = tuple()
+    def processEOF(self):
+        if self.tree.openElements[-1].name != "html":
+            self.parser.parseError("eof-in-frameset")
+        else:
+            assert self.parser.innerHTML
 
-        def processEOF(self):
-            pass
+    def processCharacters(self, token):
+        self.parser.parseError("unexpected-char-in-frameset")
 
-        def processComment(self, token):
-            self.tree.insertComment(token, self.tree.document)
+    def startTagFrameset(self, token):
+        self.tree.insertElement(token)
 
-        def processSpaceCharacters(self, token):
-            return self.parser.phases["inBody"].processSpaceCharacters(token)
+    def startTagFrame(self, token):
+        self.tree.insertElement(token)
+        self.tree.openElements.pop()
 
-        def processCharacters(self, token):
-            self.parser.parseError("expected-eof-but-got-char")
-            self.parser.phase = self.parser.phases["inBody"]
-            return token
+    def startTagNoframes(self, token):
+        return self.parser.phases["inBody"].processStartTag(token)
 
-        def startTagHtml(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
+    def startTagOther(self, token):
+        self.parser.parseError("unexpected-start-tag-in-frameset",
+                               {"name": token["name"]})
 
-        def startTagOther(self, token):
-            self.parser.parseError("expected-eof-but-got-start-tag",
-                                   {"name": token["name"]})
-            self.parser.phase = self.parser.phases["inBody"]
-            return token
+    def endTagFrameset(self, token):
+        if self.tree.openElements[-1].name == "html":
+            # innerHTML case
+            self.parser.parseError("unexpected-frameset-in-frameset-innerhtml")
+        else:
+            self.tree.openElements.pop()
+        if (not self.parser.innerHTML and
+                self.tree.openElements[-1].name != "frameset"):
+            # If we're not in innerHTML mode and the current node is not a
+            # "frameset" element (anymore) then switch.
+            self.parser.phase = self.parser.phases["afterFrameset"]
 
-        def processEndTag(self, token):
-            self.parser.parseError("expected-eof-but-got-end-tag",
-                                   {"name": token["name"]})
-            self.parser.phase = self.parser.phases["inBody"]
-            return token
+    def endTagOther(self, token):
+        self.parser.parseError("unexpected-end-tag-in-frameset",
+                               {"name": token["name"]})
 
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", startTagHtml)
-        ])
-        startTagHandler.default = startTagOther
+    startTagHandler = _utils.MethodDispatcher([
+        ("html", Phase.startTagHtml),
+        ("frameset", startTagFrameset),
+        ("frame", startTagFrame),
+        ("noframes", startTagNoframes)
+    ])
+    startTagHandler.default = startTagOther
 
-    class AfterAfterFramesetPhase(Phase):
-        __slots__ = tuple()
+    endTagHandler = _utils.MethodDispatcher([
+        ("frameset", endTagFrameset)
+    ])
+    endTagHandler.default = endTagOther
 
-        def processEOF(self):
-            pass
 
-        def processComment(self, token):
-            self.tree.insertComment(token, self.tree.document)
+class AfterFramesetPhase(Phase):
+    # http://www.whatwg.org/specs/web-apps/current-work/#after3
+    __slots__ = tuple()
 
-        def processSpaceCharacters(self, token):
-            return self.parser.phases["inBody"].processSpaceCharacters(token)
+    def processEOF(self):
+        # Stop parsing
+        pass
 
-        def processCharacters(self, token):
-            self.parser.parseError("expected-eof-but-got-char")
+    def processCharacters(self, token):
+        self.parser.parseError("unexpected-char-after-frameset")
 
-        def startTagHtml(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
+    def startTagNoframes(self, token):
+        return self.parser.phases["inHead"].processStartTag(token)
 
-        def startTagNoFrames(self, token):
-            return self.parser.phases["inHead"].processStartTag(token)
+    def startTagOther(self, token):
+        self.parser.parseError("unexpected-start-tag-after-frameset",
+                               {"name": token["name"]})
 
-        def startTagOther(self, token):
-            self.parser.parseError("expected-eof-but-got-start-tag",
-                                   {"name": token["name"]})
+    def endTagHtml(self, token):
+        self.parser.phase = self.parser.phases["afterAfterFrameset"]
 
-        def processEndTag(self, token):
-            self.parser.parseError("expected-eof-but-got-end-tag",
-                                   {"name": token["name"]})
+    def endTagOther(self, token):
+        self.parser.parseError("unexpected-end-tag-after-frameset",
+                               {"name": token["name"]})
 
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", startTagHtml),
-            ("noframes", startTagNoFrames)
-        ])
-        startTagHandler.default = startTagOther
-
-    # pylint:enable=unused-argument
-
-    return {
-        "initial": InitialPhase,
-        "beforeHtml": BeforeHtmlPhase,
-        "beforeHead": BeforeHeadPhase,
-        "inHead": InHeadPhase,
-        "inHeadNoscript": InHeadNoscriptPhase,
-        "afterHead": AfterHeadPhase,
-        "inBody": InBodyPhase,
-        "text": TextPhase,
-        "inTable": InTablePhase,
-        "inTableText": InTableTextPhase,
-        "inCaption": InCaptionPhase,
-        "inColumnGroup": InColumnGroupPhase,
-        "inTableBody": InTableBodyPhase,
-        "inRow": InRowPhase,
-        "inCell": InCellPhase,
-        "inSelect": InSelectPhase,
-        "inSelectInTable": InSelectInTablePhase,
-        "inForeignContent": InForeignContentPhase,
-        "afterBody": AfterBodyPhase,
-        "inFrameset": InFramesetPhase,
-        "afterFrameset": AfterFramesetPhase,
-        "afterAfterBody": AfterAfterBodyPhase,
-        "afterAfterFrameset": AfterAfterFramesetPhase,
-        # XXX after after frameset
-    }
+    startTagHandler = _utils.MethodDispatcher([
+        ("html", Phase.startTagHtml),
+        ("noframes", startTagNoframes)
+    ])
+    startTagHandler.default = startTagOther
+
+    endTagHandler = _utils.MethodDispatcher([
+        ("html", endTagHtml)
+    ])
+    endTagHandler.default = endTagOther
+
+
+class AfterAfterBodyPhase(Phase):
+    __slots__ = tuple()
+
+    def processEOF(self):
+        pass
+
+    def processComment(self, token):
+        self.tree.insertComment(token, self.tree.document)
+
+    def processSpaceCharacters(self, token):
+        return self.parser.phases["inBody"].processSpaceCharacters(token)
+
+    def processCharacters(self, token):
+        self.parser.parseError("expected-eof-but-got-char")
+        self.parser.phase = self.parser.phases["inBody"]
+        return token
+
+    def startTagHtml(self, token):
+        return self.parser.phases["inBody"].processStartTag(token)
+
+    def startTagOther(self, token):
+        self.parser.parseError("expected-eof-but-got-start-tag",
+                               {"name": token["name"]})
+        self.parser.phase = self.parser.phases["inBody"]
+        return token
+
+    def processEndTag(self, token):
+        self.parser.parseError("expected-eof-but-got-end-tag",
+                               {"name": token["name"]})
+        self.parser.phase = self.parser.phases["inBody"]
+        return token
+
+    startTagHandler = _utils.MethodDispatcher([
+        ("html", startTagHtml)
+    ])
+    startTagHandler.default = startTagOther
+
+
+class AfterAfterFramesetPhase(Phase):
+    __slots__ = tuple()
+
+    def processEOF(self):
+        pass
+
+    def processComment(self, token):
+        self.tree.insertComment(token, self.tree.document)
+
+    def processSpaceCharacters(self, token):
+        return self.parser.phases["inBody"].processSpaceCharacters(token)
+
+    def processCharacters(self, token):
+        self.parser.parseError("expected-eof-but-got-char")
+
+    def startTagHtml(self, token):
+        return self.parser.phases["inBody"].processStartTag(token)
+
+    def startTagNoFrames(self, token):
+        return self.parser.phases["inHead"].processStartTag(token)
+
+    def startTagOther(self, token):
+        self.parser.parseError("expected-eof-but-got-start-tag",
+                               {"name": token["name"]})
+
+    def processEndTag(self, token):
+        self.parser.parseError("expected-eof-but-got-end-tag",
+                               {"name": token["name"]})
+
+    startTagHandler = _utils.MethodDispatcher([
+        ("html", startTagHtml),
+        ("noframes", startTagNoFrames)
+    ])
+    startTagHandler.default = startTagOther
+
+# pylint:enable=unused-argument
+
+
+_phases = {
+    "initial": InitialPhase,
+    "beforeHtml": BeforeHtmlPhase,
+    "beforeHead": BeforeHeadPhase,
+    "inHead": InHeadPhase,
+    "inHeadNoscript": InHeadNoscriptPhase,
+    "afterHead": AfterHeadPhase,
+    "inBody": InBodyPhase,
+    "text": TextPhase,
+    "inTable": InTablePhase,
+    "inTableText": InTableTextPhase,
+    "inCaption": InCaptionPhase,
+    "inColumnGroup": InColumnGroupPhase,
+    "inTableBody": InTableBodyPhase,
+    "inRow": InRowPhase,
+    "inCell": InCellPhase,
+    "inSelect": InSelectPhase,
+    "inSelectInTable": InSelectInTablePhase,
+    "inForeignContent": InForeignContentPhase,
+    "afterBody": AfterBodyPhase,
+    "inFrameset": InFramesetPhase,
+    "afterFrameset": AfterFramesetPhase,
+    "afterAfterBody": AfterAfterBodyPhase,
+    "afterAfterFrameset": AfterAfterFramesetPhase,
+    # XXX after after frameset
+}
 
 
 def adjust_attributes(token, replacements):
diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py
index 879d2447..6b464bea 100644
--- a/html5lib/tests/test_parser2.py
+++ b/html5lib/tests/test_parser2.py
@@ -68,7 +68,6 @@ def test_debug_log():
                 ('dataState', 'InBodyPhase', 'InBodyPhase', 'processStartTag', {'name': 'p', 'type': 'StartTag'}),
                 ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'}),
                 ('dataState', 'InBodyPhase', 'InBodyPhase', 'processStartTag', {'name': 'script', 'type': 'StartTag'}),
-                ('dataState', 'InBodyPhase', 'InHeadPhase', 'processStartTag', {'name': 'script', 'type': 'StartTag'}),
                 ('scriptDataState', 'TextPhase', 'TextPhase', 'processCharacters', {'type': 'Characters'}),
                 ('dataState', 'TextPhase', 'TextPhase', 'processEndTag', {'name': 'script', 'type': 'EndTag'}),
                 ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'}),