Skip to content

Commit 000eae1

Browse files
committed
Added noscript parsing for head elements.
1 parent 3b3c103 commit 000eae1

File tree

1 file changed

+70
-3
lines changed

1 file changed

+70
-3
lines changed

html5lib/html5parser.py

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -708,7 +708,8 @@ def __init__(self, parser, tree):
708708
self.startTagHandler = utils.MethodDispatcher([
709709
("html", self.startTagHtml),
710710
("title", self.startTagTitle),
711-
(("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle),
711+
("noscript", self.startTagNoScript),
712+
(("noframes", "style"), self.startTagNoScriptNoFramesStyle),
712713
("script", self.startTagScript),
713714
(("base", "basefont", "bgsound", "command", "link"),
714715
self.startTagBaseLinkCommand),
@@ -777,6 +778,11 @@ def startTagScript(self, token):
777778
self.parser.originalPhase = self.parser.phase
778779
self.parser.phase = self.parser.phases["text"]
779780

781+
def startTagNoScript(self, token):
782+
self.tree.insertElement(token)
783+
self.parser.originalPhase = self.parser.phase
784+
self.parser.phase = self.parser.phases["inHeadNoscript"]
785+
780786
def startTagOther(self, token):
781787
self.anythingElse()
782788
return token
@@ -799,7 +805,68 @@ def anythingElse(self):
799805
# XXX If we implement a parser for which scripting is disabled we need to
800806
# implement this phase.
801807
#
802-
# class InHeadNoScriptPhase(Phase):
808+
class InHeadNoScriptPhase(Phase):
809+
def __init__(self, parser, tree):
810+
Phase.__init__(self, parser, tree)
811+
812+
self.startTagHandler = utils.MethodDispatcher([
813+
("meta", self.startTagMeta),
814+
("style", self.startTagStyle),
815+
("link", self.startTagLink),
816+
])
817+
self.startTagHandler.default = self.startTagOther
818+
819+
self.endTagHandler = utils.MethodDispatcher([
820+
("noscript", self.endTagNoScript),
821+
])
822+
self.endTagHandler.default = self.endTagOther
823+
824+
def startTagMeta(self, token):
825+
self.tree.insertElement(token)
826+
self.tree.openElements.pop()
827+
token["selfClosingAcknowledged"] = True
828+
829+
attributes = token["data"]
830+
if self.parser.tokenizer.stream.charEncoding[1] == "tentative":
831+
if "charset" in attributes:
832+
self.parser.tokenizer.stream.changeEncoding(attributes["charset"])
833+
elif ("content" in attributes and
834+
"http-equiv" in attributes and
835+
attributes["http-equiv"].lower() == "content-type"):
836+
# Encoding it as UTF-8 here is a hack, as really we should pass
837+
# the abstract Unicode string, and just use the
838+
# ContentAttrParser on that, but using UTF-8 allows all chars
839+
# to be encoded and as a ASCII-superset works.
840+
data = inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
841+
parser = inputstream.ContentAttrParser(data)
842+
codec = parser.parse()
843+
self.parser.tokenizer.stream.changeEncoding(codec)
844+
845+
def startTagStyle(self, token):
846+
self.parser.parseRCDataRawtext(token, "RAWTEXT")
847+
848+
def startTagLink(self, token):
849+
self.tree.insertElement(token)
850+
self.tree.openElements.pop()
851+
token["selfClosingAcknowledged"] = True
852+
853+
def startTagOther(self, token):
854+
import pdb; pdb.set_trace()
855+
self.anythingElse()
856+
return token
857+
858+
def endTagNoScript(self, token):
859+
node = self.tree.openElements.pop()
860+
assert node.name == "noscript"
861+
self.parser.phase = self.parser.originalPhase
862+
863+
def endTagOther(self, token):
864+
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
865+
866+
def anythingElse(self):
867+
self.endTagNoScript(impliedTagToken("noscript"))
868+
869+
803870
class AfterHeadPhase(Phase):
804871
def __init__(self, parser, tree):
805872
Phase.__init__(self, parser, tree)
@@ -2687,7 +2754,7 @@ def processEndTag(self, token):
26872754
"beforeHtml": BeforeHtmlPhase,
26882755
"beforeHead": BeforeHeadPhase,
26892756
"inHead": InHeadPhase,
2690-
# XXX "inHeadNoscript": InHeadNoScriptPhase,
2757+
"inHeadNoscript": InHeadNoScriptPhase,
26912758
"afterHead": AfterHeadPhase,
26922759
"inBody": InBodyPhase,
26932760
"text": TextPhase,

0 commit comments

Comments
 (0)