@@ -298,6 +298,27 @@ def startTagHtml(self, name, attributes):
298
298
def processEndTag (self , name ):
299
299
self .endTagHandler [name ](name )
300
300
301
+ def parseRCDataCData (self , name , attributes , contentType ):
302
+ """Generic (R)CDATA Parsing algorithm
303
+ contentType - RCDATA or CDATA
304
+ """
305
+ assert contentType in ("CDATA" , "RCDATA" )
306
+ element = self .tree .insertElement (name , attributes )
307
+ self .parser .tokenizer .contentModelFlag = contentModelFlags [contentType ]
308
+ for token in self .parser .tokenizer :
309
+ if token ["type" ] in ("Characters" , "SpaceCharacters" ):
310
+ self .tree .insertText (token ["data" ])
311
+ elif token ["type" ] == "ParseError" :
312
+ self .parser .parseError (token ["data" ], token .get ("datavars" , {}))
313
+ else :
314
+ assert self .parser .tokenizer .contentModelFlag == contentModelFlags ["PCDATA" ]
315
+ assert token ["type" ] == "EndTag" and token ["name" ] == name , repr (token )
316
+ assert self .tree .openElements .pop () == element
317
+ return
318
+ #Otherwise we hit EOF
319
+ assert self .tree .openElements .pop () == element
320
+ self .parser .parseError ("expected-closing-tag-but-got-eof" )
321
+
301
322
302
323
class InitialPhase (Phase ):
303
324
# This phase deals with error handling as well which is currently not
@@ -549,10 +570,6 @@ def appendToHead(self, element):
549
570
550
571
# the real thing
551
572
def processEOF (self ):
552
- if self .tree .openElements [- 1 ].name in ("title" , "style" , "script" , "noscript" ):
553
- self .parser .parseError ("expected-named-closing-tag-but-got-eof" ,
554
- {"name" : self .tree .openElements [- 1 ].name })
555
- self .tree .openElements .pop ()
556
573
self .anythingElse ()
557
574
self .parser .phase .processEOF ()
558
575
@@ -568,44 +585,18 @@ def startTagHead(self, name, attributes):
568
585
self .parser .parseError ("two-heads-are-not-better-than-one" )
569
586
570
587
def startTagTitle (self , name , attributes ):
571
- if self .tree .headPointer is not None and self .parser .phase == self .parser .phases ["inHead" ]:
572
- element = self .tree .createElement (name , attributes )
573
- self .appendToHead (element )
574
- self .tree .openElements .append (element )
575
- else :
576
- self .tree .insertElement (name , attributes )
577
- self .parser .tokenizer .contentModelFlag = contentModelFlags ["RCDATA" ]
588
+ self .parseRCDataCData (name , attributes , "RCDATA" )
578
589
579
590
def startTagStyle (self , name , attributes ):
580
- if self .tree .headPointer is not None and self .parser .phase == self .parser .phases ["inHead" ]:
581
- element = self .tree .createElement (name , attributes )
582
- self .appendToHead (element )
583
- self .tree .openElements .append (element )
584
- else :
585
- self .tree .insertElement (name , attributes )
586
- self .parser .tokenizer .contentModelFlag = contentModelFlags ["CDATA" ]
591
+ self .parseRCDataCData (name , attributes , "CDATA" )
587
592
588
593
def startTagNoScript (self , name , attributes ):
589
- # XXX Need to decide whether to implement the scripting disabled case.
590
- if self .tree .headPointer is not None and self .parser .phase == self .parser .phases ["inHead" ]:
591
- element = self .tree .createElement (name , attributes )
592
- self .appendToHead (element )
593
- self .tree .openElements .append (element )
594
- else :
595
- self .tree .insertElement (name , attributes )
596
- self .parser .tokenizer .contentModelFlag = contentModelFlags ["CDATA" ]
594
+ #Need to decide whether to implement the scripting-disabled case
595
+ self .parseRCDataCData (name , attributes , "CDATA" )
597
596
598
597
def startTagScript (self , name , attributes ):
599
- #XXX Inner HTML case may be wrong
600
- element = None
601
- if (self .tree .headPointer is not None and self .parser .phase == self .parser .phases ["inHead" ]):
602
- element = self .tree .createElement (name , attributes )
603
- self .appendToHead (element )
604
- self .tree .openElements .append (element )
605
- else :
606
- element = self .tree .insertElement (name , attributes )
607
- element ._flags .append ("parser-inserted" )
608
- self .parser .tokenizer .contentModelFlag = contentModelFlags ["CDATA" ]
598
+ #I think this is equivalent to the CDATA stuff since we don't execute script
599
+ self .parseRCDataCData (name , attributes , "CDATA" )
609
600
610
601
def startTagBaseLinkMeta (self , name , attributes ):
611
602
if (self .tree .headPointer is not None and self .parser .phase == self .parser .phases ["inHead" ]):
@@ -620,10 +611,8 @@ def startTagOther(self, name, attributes):
620
611
self .parser .phase .processStartTag (name , attributes )
621
612
622
613
def endTagHead (self , name ):
623
- if self .tree .openElements [- 1 ].name == "head" :
624
- self .tree .openElements .pop ()
625
- else :
626
- self .parser .parseError ("unexpected-end-tag" , {"name" : "head" })
614
+ assert self .tree .openElements [- 1 ].name == "head"
615
+ self .tree .openElements .pop ()
627
616
self .parser .phase = self .parser .phases ["afterHead" ]
628
617
629
618
def endTagImplyAfterHead (self , name ):
@@ -640,10 +629,8 @@ def endTagOther(self, name):
640
629
self .parser .parseError ("unexpected-end-tag" , {"name" : name })
641
630
642
631
def anythingElse (self ):
643
- if self .tree .openElements [- 1 ].name == "head" :
644
- self .endTagHead ("head" )
645
- else :
646
- self .parser .phase = self .parser .phases ["afterHead" ]
632
+ self .endTagHead ("head" )
633
+
647
634
648
635
# XXX If we implement a parser for which scripting is disabled we need to
649
636
# implement this phase.
@@ -682,8 +669,10 @@ def startTagFrameset(self, name, attributes):
682
669
def startTagFromHead (self , name , attributes ):
683
670
self .parser .parseError ("unexpected-start-tag-out-of-my-head" ,
684
671
{"name" : name })
685
- self .parser .phase = self .parser .phases ["inHead" ]
686
- self .parser .phase .processStartTag (name , attributes )
672
+ self .tree .openElements .append (self .tree .headPointer )
673
+ self .parser .phases ["inHead" ].processStartTag (name , attributes )
674
+ node = self .tree .openElements .pop ()
675
+ assert node is self .tree .headPointer , "Node name is %s, expected head" % node .name
687
676
688
677
def startTagOther (self , name , attributes ):
689
678
self .anythingElse ()
@@ -932,8 +921,7 @@ def startTagAppletMarqueeObject(self, name, attributes):
932
921
933
922
def startTagXmp (self , name , attributes ):
934
923
self .tree .reconstructActiveFormattingElements ()
935
- self .tree .insertElement (name , attributes )
936
- self .parser .tokenizer .contentModelFlag = contentModelFlags ["CDATA" ]
924
+ self .parseRCDataCData (name , attributes , "CDATA" )
937
925
938
926
def startTagTable (self , name , attributes ):
939
927
if self .tree .elementInScope ("p" ):
@@ -993,8 +981,7 @@ def startTagTextarea(self, name, attributes):
993
981
994
982
def startTagCdata (self , name , attributes ):
995
983
"""iframe, noembed noframes, noscript(if scripting enabled)"""
996
- self .tree .insertElement (name , attributes )
997
- self .parser .tokenizer .contentModelFlag = contentModelFlags ["CDATA" ]
984
+ self .parseRCDataCData (name , attributes , "CDATA" )
998
985
999
986
def startTagSelect (self , name , attributes ):
1000
987
self .tree .reconstructActiveFormattingElements ()
0 commit comments