@@ -58,7 +58,7 @@ def __init__(self, strict = False, tree=simpletree.TreeBuilder,
58
58
59
59
self .phases = {
60
60
"initial" : InitialPhase (self , self .tree ),
61
- "rootElement " : RootElementPhase (self , self .tree ),
61
+ "beforeHtml " : BeforeHtmlPhase (self , self .tree ),
62
62
"beforeHead" : BeforeHeadPhase (self , self .tree ),
63
63
"inHead" : InHeadPhase (self , self .tree ),
64
64
# XXX "inHeadNoscript": InHeadNoScriptPhase(self, self.tree),
@@ -71,10 +71,14 @@ def __init__(self, strict = False, tree=simpletree.TreeBuilder,
71
71
"inRow" : InRowPhase (self , self .tree ),
72
72
"inCell" : InCellPhase (self , self .tree ),
73
73
"inSelect" : InSelectPhase (self , self .tree ),
74
+ # XXX inSelectInTable
74
75
"afterBody" : AfterBodyPhase (self , self .tree ),
75
76
"inFrameset" : InFramesetPhase (self , self .tree ),
76
77
"afterFrameset" : AfterFramesetPhase (self , self .tree ),
77
78
"trailingEnd" : TrailingEndPhase (self , self .tree )
79
+ # XXX after after body
80
+ # XXX after after frameset
81
+ # XXX trailingEnd is gone
78
82
}
79
83
80
84
def _parse (self , stream , innerHTML = False , container = "div" ,
@@ -101,7 +105,7 @@ def _parse(self, stream, innerHTML=False, container="div",
101
105
# contentModelFlag already is PCDATA
102
106
#self.tokenizer.contentModelFlag = tokenizer.contentModelFlags["PCDATA"]
103
107
pass
104
- self .phase = self .phases ["rootElement " ]
108
+ self .phase = self .phases ["beforeHtml " ]
105
109
self .phase .insertHtmlElement ()
106
110
self .resetInsertionMode ()
107
111
else :
@@ -300,7 +304,7 @@ class InitialPhase(Phase):
300
304
# this.
301
305
def processEOF (self ):
302
306
self .parser .parseError ("expected-doctype-but-got-eof" )
303
- self .parser .phase = self .parser .phases ["rootElement " ]
307
+ self .parser .phase = self .parser .phases ["beforeHtml " ]
304
308
self .parser .phase .processEOF ()
305
309
306
310
def processComment (self , data ):
@@ -401,30 +405,30 @@ def processDoctype(self, name, publicId, systemId, correct):
401
405
#XXX quirks mode
402
406
pass
403
407
404
- self .parser .phase = self .parser .phases ["rootElement " ]
408
+ self .parser .phase = self .parser .phases ["beforeHtml " ]
405
409
406
410
def processSpaceCharacters (self , data ):
407
411
pass
408
412
409
413
def processCharacters (self , data ):
410
414
self .parser .parseError ("expected-doctype-but-got-chars" )
411
- self .parser .phase = self .parser .phases ["rootElement " ]
415
+ self .parser .phase = self .parser .phases ["beforeHtml " ]
412
416
self .parser .phase .processCharacters (data )
413
417
414
418
def processStartTag (self , name , attributes ):
415
419
self .parser .parseError ("expected-doctype-but-got-start-tag" ,
416
420
{"name" : name })
417
- self .parser .phase = self .parser .phases ["rootElement " ]
421
+ self .parser .phase = self .parser .phases ["beforeHtml " ]
418
422
self .parser .phase .processStartTag (name , attributes )
419
423
420
424
def processEndTag (self , name ):
421
425
self .parser .parseError ("expected-doctype-but-got-end-tag" ,
422
426
{"name" : name })
423
- self .parser .phase = self .parser .phases ["rootElement " ]
427
+ self .parser .phase = self .parser .phases ["beforeHtml " ]
424
428
self .parser .phase .processEndTag (name )
425
429
426
430
427
- class RootElementPhase (Phase ):
431
+ class BeforeHtmlPhase (Phase ):
428
432
# helper methods
429
433
def insertHtmlElement (self ):
430
434
self .tree .insertRoot ("html" )
@@ -475,6 +479,9 @@ def processEOF(self):
475
479
self .startTagHead ("head" , {})
476
480
self .parser .phase .processEOF ()
477
481
482
+ def processSpaceCharacters (self , data ):
483
+ pass
484
+
478
485
def processCharacters (self , data ):
479
486
self .startTagHead ("head" , {})
480
487
self .parser .phase .processCharacters (data )
@@ -548,33 +555,36 @@ def startTagHead(self, name, attributes):
548
555
self .parser .parseError ("two-heads-are-not-better-than-one" )
549
556
550
557
def startTagTitle (self , name , attributes ):
551
- element = self .tree .createElement (name , attributes )
552
- self .appendToHead (element )
553
- self .tree .openElements .append (element )
558
+ if self .tree .headPointer is not None and \
559
+ self .parser .phase == self .parser .phases ["inHead" ]:
560
+ element = self .tree .createElement (name , attributes )
561
+ self .appendToHead (element )
562
+ self .tree .openElements .append (element )
563
+ else :
564
+ self .tree .insertElement (name , attributes )
554
565
self .parser .tokenizer .contentModelFlag = contentModelFlags ["RCDATA" ]
555
566
556
567
def startTagStyle (self , name , attributes ):
557
- element = self .tree .createElement (name , attributes )
558
568
if self .tree .headPointer is not None and \
559
569
self .parser .phase == self .parser .phases ["inHead" ]:
570
+ element = self .tree .createElement (name , attributes )
560
571
self .appendToHead (element )
572
+ self .tree .openElements .append (element )
561
573
else :
562
- self .tree .openElements [- 1 ].appendChild (element )
563
- self .tree .openElements .append (element )
574
+ self .tree .insertElement (name , attributes )
564
575
self .parser .tokenizer .contentModelFlag = contentModelFlags ["CDATA" ]
565
576
566
577
def startTagNoScript (self , name , attributes ):
567
578
# XXX Need to decide whether to implement the scripting disabled case.
568
- element = self .tree .createElement (name , attributes )
569
579
if self .tree .headPointer is not None and \
570
580
self .parser .phase == self .parser .phases ["inHead" ]:
581
+ element = self .tree .createElement (name , attributes )
571
582
self .appendToHead (element )
583
+ self .tree .openElements .append (element )
572
584
else :
573
- self .tree .openElements [- 1 ].appendChild (element )
574
- self .tree .openElements .append (element )
585
+ self .tree .insertElement (name , attributes )
575
586
self .parser .tokenizer .contentModelFlag = contentModelFlags ["CDATA" ]
576
-
577
-
587
+
578
588
def startTagScript (self , name , attributes ):
579
589
#XXX Inner HTML case may be wrong
580
590
element = self .tree .createElement (name , attributes )
@@ -689,9 +699,8 @@ def __init__(self, parser, tree):
689
699
690
700
self .startTagHandler = utils .MethodDispatcher ([
691
701
("html" , self .startTagHtml ),
692
- (("base" , "link" , "meta" , "script" , "style" ),
702
+ (("base" , "link" , "meta" , "script" , "style" , "title" ),
693
703
self .startTagProcessInHead ),
694
- ("title" , self .startTagTitle ),
695
704
("body" , self .startTagBody ),
696
705
(("address" , "blockquote" , "center" , "dir" , "div" , "dl" ,
697
706
"fieldset" , "listing" , "menu" , "ol" , "p" , "pre" , "ul" ),
@@ -705,7 +714,7 @@ def __init__(self, parser, tree):
705
714
"tt" , "u" ),self .startTagFormatting ),
706
715
("nobr" , self .startTagNobr ),
707
716
("button" , self .startTagButton ),
708
- (("marquee" , "object" ), self .startTagMarqueeObject ),
717
+ (("applet" , " marquee" , "object" ), self .startTagAppletMarqueeObject ),
709
718
("xmp" , self .startTagXmp ),
710
719
("table" , self .startTagTable ),
711
720
(("area" , "basefont" , "bgsound" , "br" , "embed" , "img" , "param" ,
@@ -736,7 +745,7 @@ def __init__(self, parser, tree):
736
745
(headingElements , self .endTagHeading ),
737
746
(("a" , "b" , "big" , "em" , "font" , "i" , "nobr" , "s" , "small" ,
738
747
"strike" , "strong" , "tt" , "u" ), self .endTagFormatting ),
739
- (("marquee" , "object" , "button" ), self .endTagButtonMarqueeObject ),
748
+ (("applet" , " marquee" , "object" , "button" ), self .endTagAppletButtonMarqueeObject ),
740
749
(("head" , "frameset" , "select" , "optgroup" , "option" , "table" ,
741
750
"caption" , "colgroup" , "col" , "thead" , "tfoot" , "tbody" , "tr" ,
742
751
"td" , "th" ), self .endTagMisplaced ),
@@ -759,11 +768,11 @@ def addFormattingElement(self, name, attributes):
759
768
760
769
# the real deal
761
770
def processSpaceCharactersDropNewline (self , data ):
762
- # Sometimes (start of <pre> and <textarea> blocks) we want to drop
763
- # leading newlines
771
+ # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
772
+ # want to drop leading newlines
764
773
self .processSpaceCharacters = self .processSpaceCharactersNonPre
765
774
if (data .startswith ("\n " ) and
766
- self .tree .openElements [- 1 ].name in ("pre" , "textarea" ) and
775
+ self .tree .openElements [- 1 ].name in ("pre" , "listing" , " textarea" ) and
767
776
not self .tree .openElements [- 1 ].hasContent ()):
768
777
data = data [1 :]
769
778
if data :
@@ -785,11 +794,6 @@ def processSpaceCharacters(self, data):
785
794
def startTagProcessInHead (self , name , attributes ):
786
795
self .parser .phases ["inHead" ].processStartTag (name , attributes )
787
796
788
- def startTagTitle (self , name , attributes ):
789
- self .parser .parseError ("unexpected-start-tag-out-of-my-head" ,
790
- {"name" : name })
791
- self .parser .phases ["inHead" ].processStartTag (name , attributes )
792
-
793
797
def startTagBody (self , name , attributes ):
794
798
self .parser .parseError ("unexpected-start-tag" , {"name" : "body" })
795
799
if (len (self .tree .openElements ) == 1
@@ -804,7 +808,7 @@ def startTagCloseP(self, name, attributes):
804
808
if self .tree .elementInScope ("p" ):
805
809
self .endTagP ("p" )
806
810
self .tree .insertElement (name , attributes )
807
- if name == "pre" :
811
+ if name in ( "pre" , "listing" ) :
808
812
self .processSpaceCharacters = self .processSpaceCharactersDropNewline
809
813
810
814
def startTagForm (self , name , attributes ):
@@ -902,7 +906,7 @@ def startTagButton(self, name, attributes):
902
906
self .tree .insertElement (name , attributes )
903
907
self .tree .activeFormattingElements .append (Marker )
904
908
905
- def startTagMarqueeObject (self , name , attributes ):
909
+ def startTagAppletMarqueeObject (self , name , attributes ):
906
910
self .tree .reconstructActiveFormattingElements ()
907
911
self .tree .insertElement (name , attributes )
908
912
self .tree .activeFormattingElements .append (Marker )
@@ -1201,7 +1205,7 @@ def endTagFormatting(self, name):
1201
1205
self .tree .openElements .insert (
1202
1206
self .tree .openElements .index (furthestBlock ) + 1 , clone )
1203
1207
1204
- def endTagButtonMarqueeObject (self , name ):
1208
+ def endTagAppletButtonMarqueeObject (self , name ):
1205
1209
if self .tree .elementInScope (name ):
1206
1210
self .tree .generateImpliedEndTags ()
1207
1211
if self .tree .openElements [- 1 ].name != name :
@@ -1269,12 +1273,15 @@ def __init__(self, parser, tree):
1269
1273
("col" , self .startTagCol ),
1270
1274
(("tbody" , "tfoot" , "thead" ), self .startTagRowGroup ),
1271
1275
(("td" , "th" , "tr" ), self .startTagImplyTbody ),
1272
- ("table" , self .startTagTable )
1276
+ ("table" , self .startTagTable ),
1277
+ (("style" , "script" ), self .startTagStyleScript ),
1278
+ ("input" , self .startTagInput )
1273
1279
])
1274
1280
self .startTagHandler .default = self .startTagOther
1275
1281
1276
1282
self .endTagHandler = utils .MethodDispatcher ([
1277
1283
("table" , self .endTagTable ),
1284
+ (("style" , "script" ), self .endTagStyleScript ),
1278
1285
(("body" , "caption" , "col" , "colgroup" , "html" , "tbody" , "td" ,
1279
1286
"tfoot" , "th" , "thead" , "tr" ), self .endTagIgnore )
1280
1287
])
@@ -1289,14 +1296,30 @@ def clearStackToTableContext(self):
1289
1296
self .tree .openElements .pop ()
1290
1297
# When the current node is <html> it's an innerHTML case
1291
1298
1299
+ def getCurrentTable (self ):
1300
+ i = - 1
1301
+ while self .tree .openElements [i ].name != "table" :
1302
+ i -= 1
1303
+ return self .tree .openElements [i ]
1304
+
1292
1305
# processing methods
1306
+ def processSpaceCharacters (self , data ):
1307
+ if "tainted" not in self .getCurrentTable ()._flags :
1308
+ self .tree .insertText (data )
1309
+ else :
1310
+ self .processCharacters (data )
1311
+
1293
1312
def processCharacters (self , data ):
1294
- self .parser .parseError ("unexpected-char-implies-table-voodoo" )
1295
- # Make all the special element rearranging voodoo kick in
1296
- self .tree .insertFromTable = True
1297
- # Process the character in the "in body" mode
1298
- self .parser .phases ["inBody" ].processCharacters (data )
1299
- self .tree .insertFromTable = False
1313
+ if self .tree .openElements [- 1 ].name in ("style" , "script" ):
1314
+ self .tree .insertText (data )
1315
+ else :
1316
+ if "tainted" not in self .getCurrentTable ()._flags :
1317
+ self .parser .parseError ("unexpected-char-implies-table-voodoo" )
1318
+ self .getCurrentTable ()._flags .append ("tainted" )
1319
+ # Do the table magic!
1320
+ self .tree .insertFromTable = True
1321
+ self .parser .phases ["inBody" ].processCharacters (data )
1322
+ self .tree .insertFromTable = False
1300
1323
1301
1324
def startTagCaption (self , name , attributes ):
1302
1325
self .clearStackToTableContext ()
@@ -1329,12 +1352,27 @@ def startTagTable(self, name, attributes):
1329
1352
if not self .parser .innerHTML :
1330
1353
self .parser .phase .processStartTag (name , attributes )
1331
1354
1355
+ def startTagStyleScript (self , name , attributes ):
1356
+ if "tainted" not in self .getCurrentTable ()._flags :
1357
+ self .parser .phases ["inHead" ].processStartTag (name , attributes )
1358
+ else :
1359
+ self .startTagOther (name , attributes )
1360
+
1361
+ def startTagInput (self , name , attributes ):
1362
+ if "type" in attributes and attributes ["type" ].translate (asciiUpper2Lower ) == "hidden" and "tainted" not in self .getCurrentTable ()._flags :
1363
+ self .parser .parseError ("unpexted-hidden-input-in-table" )
1364
+ self .tree .insertElement (name , attributes )
1365
+ # XXX associate with form
1366
+ self .tree .openElements .pop ()
1367
+ else :
1368
+ self .startTagOther (name , attributes )
1369
+
1332
1370
def startTagOther (self , name , attributes ):
1333
- self .parser .parseError ("unexpected-start-tag-implies-table-voodoo" ,
1334
- {"name" : name })
1335
- # Make all the special element rearranging voodoo kick in
1371
+ if "tainted" not in self .getCurrentTable ()._flags :
1372
+ self .parser .parseError ("unexpected-start-tag-implies-table-voodoo" , {"name" : name })
1373
+ self .getCurrentTable ()._flags .append ("tainted" )
1374
+ # Do the table magic!
1336
1375
self .tree .insertFromTable = True
1337
- # Process the start tag in the "in body" mode
1338
1376
self .parser .phases ["inBody" ].processStartTag (name , attributes )
1339
1377
self .tree .insertFromTable = False
1340
1378
@@ -1354,15 +1392,21 @@ def endTagTable(self, name):
1354
1392
assert self .parser .innerHTML
1355
1393
self .parser .parseError ()
1356
1394
1395
+ def endTagStyleScript (self , name ):
1396
+ if "tainted" not in self .getCurrentTable ()._flags :
1397
+ self .parser .phases ["inHead" ].processEndTag (name )
1398
+ else :
1399
+ self .endTagOther (name )
1400
+
1357
1401
def endTagIgnore (self , name ):
1358
1402
self .parser .parseError ("unexpected-end-tag" , {"name" : name })
1359
1403
1360
1404
def endTagOther (self , name ):
1361
- self .parser .parseError ("unexpected-end-tag-implies-table-voodoo" ,
1362
- {"name" : name })
1363
- # Make all the special element rearranging voodoo kick in
1405
+ if "tainted" not in self .getCurrentTable ()._flags :
1406
+ self .parser .parseError ("unexpected-end-tag-implies-table-voodoo" , {"name" : name })
1407
+ self .getCurrentTable ()._flags .append ("tainted" )
1408
+ # Do the table magic!
1364
1409
self .tree .insertFromTable = True
1365
- # Process the end tag in the "in body" mode
1366
1410
self .parser .phases ["inBody" ].processEndTag (name )
1367
1411
self .tree .insertFromTable = False
1368
1412
@@ -1749,7 +1793,8 @@ def __init__(self, parser, tree):
1749
1793
("html" , self .startTagHtml ),
1750
1794
("option" , self .startTagOption ),
1751
1795
("optgroup" , self .startTagOptgroup ),
1752
- ("select" , self .startTagSelect )
1796
+ ("select" , self .startTagSelect ),
1797
+ ("input" , self .startTagInput )
1753
1798
])
1754
1799
self .startTagHandler .default = self .startTagOther
1755
1800
@@ -1783,6 +1828,11 @@ def startTagSelect(self, name, attributes):
1783
1828
self .parser .parseError ("unexpected-select-in-select" )
1784
1829
self .endTagSelect ("select" )
1785
1830
1831
+ def startTagInput (self , name , attributes ):
1832
+ self .parser .parseError ("unexpected-input-in-select" )
1833
+ self .endTagSelect ("select" )
1834
+ self .parser .phase .processStartTag (name , attributes )
1835
+
1786
1836
def startTagOther (self , name , attributes ):
1787
1837
self .parser .parseError ("unexpected-start-tag-in-select" ,
1788
1838
{"name" : name })
0 commit comments