MegacoderKim
diff --git a/‎html5lib/constants.py
Lines changed: 2 additions & 1 deletion b/‎html5lib/constants.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎html5lib/html5parser.py
Lines changed: 81 additions & 90 deletions b/‎html5lib/html5parser.py
Lines changed: 81 additions & 90 deletions
@@ -483,7 +483,8 @@
     "area",
     "col",
     "input",
-    "source"
+    "source",
+    "track"
 ))
 
 cdataElements = frozenset(('title', 'textarea'))
 
@@ -150,6 +150,19 @@ def reset(self):
         self.beforeRCDataPhase = None
 
         self.framesetOK = True
+
+    def isHTMLIntegrationPoint(self, element):
+        if (element.name == "annotation-xml" and 
+            element.namespace == namespaces["mathml"]):
+            return ("encoding" in element.attributes and
+                    element.attributes["encoding"].translate(
+                        asciiUpper2Lower) in 
+                    ("text/html", "application/xhtml+xml"))
+        else:
+            return (element.namespace, element.name) in htmlIntegrationPointElements
+
+    def isMathMLTextIntegrationPoint(self, element):
+        return (element.namespace, element.name) in mathmlTextIntegrationPointElements
 
     def mainLoop(self):
         CharactersToken = tokenTypes["Characters"]
@@ -158,27 +171,48 @@ def mainLoop(self):
         EndTagToken = tokenTypes["EndTag"]
         CommentToken = tokenTypes["Comment"]
         DoctypeToken = tokenTypes["Doctype"]
-        
+        ParseErrorToken = tokenTypes["ParseError"]
 
         for token in self.normalizedTokens():
             new_token = token
             while new_token is not None:
+                currentNode = self.tree.openElements[-1] if self.tree.openElements else None
+                currentNodeNamespace = currentNode.namespace if currentNode else None
+                currentNodeName = currentNode.name if currentNode else None
+
                 type = new_token["type"]
-                if type == CharactersToken:
-                    new_token = self.phase.processCharacters(new_token)
-                elif type == SpaceCharactersToken:
-                     new_token= self.phase.processSpaceCharacters(new_token)
-                elif type == StartTagToken:
-                    new_token = self.phase.processStartTag(new_token)
-                elif type == EndTagToken:
-                    new_token = self.phase.processEndTag(new_token)
-                elif type == CommentToken:
-                    new_token = self.phase.processComment(new_token)
-                elif type == DoctypeToken:
-                    new_token = self.phase.processDoctype(new_token)
-                else:
+                
+                if type == ParseErrorToken:
                     self.parseError(new_token["data"], new_token.get("datavars", {}))
                     new_token = None
+                else:
+                    if (len(self.tree.openElements) == 0 or
+                        currentNodeNamespace == self.tree.defaultNamespace or
+                        (self.isMathMLTextIntegrationPoint(currentNode) and
+                         ((type == StartTagToken and
+                           token["name"] not in frozenset(["mglyph", "malignmark"])) or
+                         type in (CharactersToken, SpaceCharactersToken))) or
+                        (currentNodeNamespace == namespaces["mathml"] and
+                         currentNodeName == "annotation-xml" and
+                         token["name"] == "svg") or
+                        (self.isHTMLIntegrationPoint(currentNode) and
+                         type in (StartTagToken, CharactersToken, SpaceCharactersToken))):
+                        phase = self.phase
+                    else:
+                        phase = self.phases["inForeignContent"]
+
+                    if type == CharactersToken:
+                        new_token = phase.processCharacters(new_token)
+                    elif type == SpaceCharactersToken:
+                         new_token= phase.processSpaceCharacters(new_token)
+                    elif type == StartTagToken:
+                        new_token = phase.processStartTag(new_token)
+                    elif type == EndTagToken:
+                        new_token = phase.processEndTag(new_token)
+                    elif type == CommentToken:
+                        new_token = phase.processComment(new_token)
+                    elif type == DoctypeToken:
+                        new_token = phase.processDoctype(new_token)
 
             if (type == StartTagToken and token["selfClosing"]
                 and not token["selfClosingAcknowledged"]):
@@ -379,12 +413,12 @@ def resetInsertionMode(self):
             if nodeName in ("select", "colgroup", "head", "html"):
                 assert self.innerHTML
 
+            if not last and node.namespace != self.tree.defaultNamespace:
+                continue
+
             if nodeName in newModes:
                 new_phase = self.phases[newModes[nodeName]]
                 break
-            elif node.namespace in (namespaces["mathml"], namespaces["svg"]):
-                new_phase = self.phases["inForeignContent"]
-                break
             elif last:
                 new_phase = self.phases["inBody"]
                 break
@@ -419,7 +453,6 @@ def wrapped(self, *args, **kwargs):
                 try:
                     info = {"type":type_names[token['type']]}
                 except:
-                    print token
                     raise
                 if token['type'] in constants.tagTokenTypes:
                     info["name"] = token['name']
@@ -1243,7 +1276,6 @@ def startTagMath(self, token):
             self.tree.insertElement(token)
             #Need to get the parse error right for the case where the token 
             #has a namespace not equal to the xmlns attribute
-            self.parser.phase = self.parser.phases["inForeignContent"]
             if token["selfClosing"]:
                 self.tree.openElements.pop()
                 token["selfClosingAcknowledged"] = True
@@ -1256,7 +1288,6 @@ def startTagSvg(self, token):
             self.tree.insertElement(token)
             #Need to get the parse error right for the case where the token 
             #has a namespace not equal to the xmlns attribute
-            self.parser.phase = self.parser.phases["inForeignContent"]
             if token["selfClosing"]:
                 self.tree.openElements.pop()
                 token["selfClosingAcknowledged"] = True
@@ -1741,7 +1772,7 @@ def processSpaceCharacters(self, token):
             self.characterTokens.append(token)
     #        assert False
 
-        def processStartTag(self, token):        
+        def processStartTag(self, token):
             self.flushCharacters()
             self.parser.phase = self.originalPhase
             return token
@@ -2298,7 +2329,7 @@ def endTagOther(self, token):
     class InForeignContentPhase(Phase):
         breakoutElements = frozenset(["b", "big", "blockquote", "body", "br", 
                                       "center", "code", "dd", "div", "dl", "dt",
-                                      "em", "embed", "font", "h1", "h2", "h3", 
+                                      "em", "embed", "h1", "h2", "h3", 
                                       "h4", "h5", "h6", "head", "hr", "i", "img",
                                       "li", "listing", "menu", "meta", "nobr", 
                                       "ol", "p", "pre", "ruby", "s",  "small", 
@@ -2307,19 +2338,6 @@ class InForeignContentPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
-        def isHTMLIntegrationPoint(self, element):
-            if (element.name == "annotation-xml" and 
-                element.namespace == namespaces["mathml"]):
-                return ("encoding" in element.attributes and
-                        element.attributes["encoding"].translate(
-                        asciiUpper2Lower) in 
-                        ("text/html", "application/xhtml+xml"))
-            else:
-                return (element.namespace, element.name) in htmlIntegrationPointElements
-
-        def isMathMLTextIntegrationPoint(self, element):
-            return (element.namespace, element.name) in mathmlTextIntegrationPointElements
-
         def adjustSVGTagNames(self, token):
             replacements = {u"altglyph":u"altGlyph",
                             u"altglyphdef":u"altGlyphDef",
@@ -2362,48 +2380,25 @@ def adjustSVGTagNames(self, token):
                 token["name"] = replacements[token["name"]]
 
         def processCharacters(self, token):
-            if (self.tree.openElements[-1].namespace == self.tree.defaultNamespace or
-                self.isHTMLIntegrationPoint(self.tree.openElements[-1])):
-                new_token = self.parser.phases["inBody"].processCharacters(token)
-                self.parser.resetInsertionMode()
-                return new_token
-            elif token["data"] == u"\u0000":
+            if token["data"] == u"\u0000":
                 token["data"] = u"\uFFFD"
             elif (self.parser.framesetOK and 
                   any(char not in spaceCharacters for char in token["data"])):
                 self.parser.framesetOK = False
             Phase.processCharacters(self, token)
 
-        def processEOF(self):
-            reprocess = self.parser.phases["inBody"].processEOF()
-            self.parser.resetInsertionMode()
-            return reprocess
-
         def processStartTag(self, token):
             currentNode = self.tree.openElements[-1]
-            currentNodeNamespace = currentNode.namespace
-            currentNodeName = currentNode.name
-            if (currentNodeNamespace == self.tree.defaultNamespace or
-                (self.isMathMLTextIntegrationPoint(currentNode) and 
-                 token["name"] not in frozenset(["mglyph", "malignmark"])) or
-                (currentNodeNamespace == namespaces["mathml"] and
-                 currentNodeName == "annotation-xml" and
-                 token["name"] == "svg") or
-                self.isHTMLIntegrationPoint(currentNode)):
-                
-                new_token = self.parser.phases["inBody"].processStartTag(token)
-                self.parser.resetInsertionMode()
-                return new_token
-                
-            elif token["name"] in self.breakoutElements:
+            if (token["name"] in self.breakoutElements or
+                (token["name"] == "font" and
+                 set(token["data"].keys()) | set("color", "face", "size"))):
                 self.parser.parseError("unexpected-html-element-in-foreign-content",
                                        token["name"])
                 while (self.tree.openElements[-1].namespace !=
                        self.tree.defaultNamespace and 
-                       not self.isHTMLIntegrationPoint(self.tree.openElements[-1]) and
-                       not self.isMathMLTextIntegrationPoint(self.tree.openElements[-1])):
+                       not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and
+                       not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])):
                     self.tree.openElements.pop()
-                self.parser.resetInsertionMode()
                 return token
 
             else:
@@ -2420,33 +2415,29 @@ def processStartTag(self, token):
                     token["selfClosingAcknowledged"] = True
 
         def processEndTag(self, token):
-            if self.tree.openElements[-1].namespace == self.tree.defaultNamespace:
-                new_token = self.parser.phases["inBody"].processEndTag(token)
-                self.parser.resetInsertionMode()
-                return new_token 
-            else:
-                nodeIndex = len(self.tree.openElements) - 1
-                node = self.tree.openElements[-1]
-                if node.name != token["name"]:
-                    self.parser.parseError("unexpected-end-tag", token["name"])
-
-                while True:
-                    if node.name.translate(asciiUpper2Lower) == token["name"]:
-                        while self.tree.openElements.pop() != node:
-                            assert self.tree.openElements
-                        new_token = None
-                        break
-                    nodeIndex -= 1
+            nodeIndex = len(self.tree.openElements) - 1
+            node = self.tree.openElements[-1]
+            if node.name != token["name"]:
+                self.parser.parseError("unexpected-end-tag", token["name"])
+
+            while True:
+                if node.name.translate(asciiUpper2Lower) == token["name"]:
+                    if self.parser.phase == self.parser.phases["inTableText"]:
+                        self.parser.phase.flushCharacters()
+                        self.parser.phase = self.parser.phase.originalPhase
+                    while self.tree.openElements.pop() != node:
+                        assert self.tree.openElements
+                    new_token = None
+                    break
+                nodeIndex -= 1
 
-                    node = self.tree.openElements[nodeIndex]
-                    if node.namespace != self.tree.defaultNamespace:
-                        continue
-                    else:
-                        new_token = self.parser.phases["inBody"].processEndTag(token)
-                        break
-                if self.parser.phase == self:
-                    self.parser.resetInsertionMode()
-                return new_token
+                node = self.tree.openElements[nodeIndex]
+                if node.namespace != self.tree.defaultNamespace:
+                    continue
+                else:
+                    new_token = self.parser.phase.processEndTag(token)
+                    break
+            return new_token
 
 
     class AfterBodyPhase(Phase):