@@ -460,152 +460,133 @@ def processEndTag(self, token):
460
460
self .endTagHandler [token ["name" ]](token )
461
461
462
462
class InitialPhase (Phase ):
463
- # This phase deals with error handling as well which is currently not
464
- # covered in the specification. The error handling is typically known as
465
- # "quirks mode". It is expected that a future version of HTML5 will defin
466
- # this.
467
- def processEOF (self ):
468
- self .parser .parseError ("expected-doctype-but-got-eof" )
469
- self .parser .compatMode = "quirks"
470
- self .parser .phase = self .parser .phases ["beforeHtml" ]
471
- self .parser .phase .processEOF ()
472
-
463
+ def processSpaceCharacters (self , token ):
464
+ pass
465
+
473
466
def processComment (self , token ):
474
467
self .tree .insertComment (token , self .tree .document )
475
468
476
469
def processDoctype (self , token ):
477
-
478
470
name = token ["name" ]
479
471
publicId = token ["publicId" ]
480
472
systemId = token ["systemId" ]
481
473
correct = token ["correct" ]
482
474
483
475
if (name != "html" or publicId != None or
484
- systemId != None ):
476
+ systemId != None and systemId != "about:legacy-compat" ):
485
477
self .parser .parseError ("unknown-doctype" )
486
478
487
479
if publicId is None :
488
480
publicId = ""
489
- if systemId is None :
490
- systemId = ""
491
481
492
482
self .tree .insertDoctype (token )
493
483
494
484
if publicId != "" :
495
485
publicId = publicId .translate (asciiUpper2Lower )
496
486
497
487
if (not correct or token ["name" ] != "html"
498
- or publicId in
499
- ("+//silmaril//dtd html pro v0r11 19970101//en" ,
500
- "-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" ,
501
- "-//as//dtd html 3.0 aswedit + extensions//en" ,
502
- "-//ietf//dtd html 2.0 level 1//en" ,
503
- "-//ietf//dtd html 2.0 level 2//en" ,
504
- "-//ietf//dtd html 2.0 strict level 1//en" ,
505
- "-//ietf//dtd html 2.0 strict level 2//en" ,
506
- "-//ietf//dtd html 2.0 strict//en" ,
507
- "-//ietf//dtd html 2.0//en" ,
508
- "-//ietf//dtd html 2.1e//en" ,
509
- "-//ietf//dtd html 3.0//en" ,
510
- "-//ietf//dtd html 3.0//en//" ,
511
- "-//ietf//dtd html 3.2 final//en" ,
512
- "-//ietf//dtd html 3.2//en" ,
513
- "-//ietf//dtd html 3//en" ,
514
- "-//ietf//dtd html level 0//en" ,
515
- "-//ietf//dtd html level 0//en//2.0" ,
516
- "-//ietf//dtd html level 1//en" ,
517
- "-//ietf//dtd html level 1//en//2.0" ,
518
- "-//ietf//dtd html level 2//en" ,
519
- "-//ietf//dtd html level 2//en//2.0" ,
520
- "-//ietf//dtd html level 3//en" ,
521
- "-//ietf//dtd html level 3//en//3.0" ,
522
- "-//ietf//dtd html strict level 0//en" ,
523
- "-//ietf//dtd html strict level 0//en//2.0" ,
524
- "-//ietf//dtd html strict level 1//en" ,
525
- "-//ietf//dtd html strict level 1//en//2.0" ,
526
- "-//ietf//dtd html strict level 2//en" ,
527
- "-//ietf//dtd html strict level 2//en//2.0" ,
528
- "-//ietf//dtd html strict level 3//en" ,
529
- "-//ietf//dtd html strict level 3//en//3.0" ,
530
- "-//ietf//dtd html strict//en" ,
531
- "-//ietf//dtd html strict//en//2.0" ,
532
- "-//ietf//dtd html strict//en//3.0" ,
533
- "-//ietf//dtd html//en" ,
534
- "-//ietf//dtd html//en//2.0" ,
535
- "-//ietf//dtd html//en//3.0" ,
536
- "-//metrius//dtd metrius presentational//en" ,
537
- "-//microsoft//dtd internet explorer 2.0 html strict//en" ,
538
- "-//microsoft//dtd internet explorer 2.0 html//en" ,
539
- "-//microsoft//dtd internet explorer 2.0 tables//en" ,
540
- "-//microsoft//dtd internet explorer 3.0 html strict//en" ,
541
- "-//microsoft//dtd internet explorer 3.0 html//en" ,
542
- "-//microsoft//dtd internet explorer 3.0 tables//en" ,
543
- "-//netscape comm. corp.//dtd html//en" ,
544
- "-//netscape comm. corp.//dtd strict html//en" ,
545
- "-//o'reilly and associates//dtd html 2.0//en" ,
546
- "-//o'reilly and associates//dtd html extended 1.0//en" ,
547
- "-//o'reilly and associates//dtd html extended relaxed 1.0//en" ,
548
- "-//spyglass//dtd html 2.0 extended//en" ,
549
- "-//sq//dtd html 2.0 hotmetal + extensions//en" ,
550
- "-//sun microsystems corp.//dtd hotjava html//en" ,
551
- "-//sun microsystems corp.//dtd hotjava strict html//en" ,
552
- "-//w3c//dtd html 3 1995-03-24//en" ,
553
- "-//w3c//dtd html 3.2 draft//en" ,
554
- "-//w3c//dtd html 3.2 final//en" ,
555
- "-//w3c//dtd html 3.2//en" ,
556
- "-//w3c//dtd html 3.2s draft//en" ,
557
- "-//w3c//dtd html 4.0 frameset//en" ,
558
- "-//w3c//dtd html 4.0 transitional//en" ,
559
- "-//w3c//dtd html experimental 19960712//en" ,
560
- "-//w3c//dtd html experimental 970421//en" ,
561
- "-//w3c//dtd w3 html//en" ,
562
- "-//w3o//dtd w3 html 3.0//en" ,
563
- "-//w3o//dtd w3 html 3.0//en//" ,
564
- "-//w3o//dtd w3 html strict 3.0//en//" ,
565
- "-//webtechs//dtd mozilla html 2.0//en" ,
566
- "-//webtechs//dtd mozilla html//en" ,
567
- "-/w3c/dtd html 4.0 transitional/en" ,
568
- "html" )
569
- or (publicId in
570
- ("-//w3c//dtd html 4.01 frameset//EN" ,
571
- "-//w3c//dtd html 4.01 transitional//EN" ) and
572
- systemId == None )
573
- or (systemId != None and
574
- systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" )):
488
+ or publicId .startswith (
489
+ ("+//silmaril//dtd html pro v0r11 19970101//" ,
490
+ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//" ,
491
+ "-//as//dtd html 3.0 aswedit + extensions//" ,
492
+ "-//ietf//dtd html 2.0 level 1//" ,
493
+ "-//ietf//dtd html 2.0 level 2//" ,
494
+ "-//ietf//dtd html 2.0 strict level 1//" ,
495
+ "-//ietf//dtd html 2.0 strict level 2//" ,
496
+ "-//ietf//dtd html 2.0 strict//" ,
497
+ "-//ietf//dtd html 2.0//" ,
498
+ "-//ietf//dtd html 2.1e//" ,
499
+ "-//ietf//dtd html 3.0//" ,
500
+ "-//ietf//dtd html 3.2 final//" ,
501
+ "-//ietf//dtd html 3.2//" ,
502
+ "-//ietf//dtd html 3//" ,
503
+ "-//ietf//dtd html level 0//" ,
504
+ "-//ietf//dtd html level 1//" ,
505
+ "-//ietf//dtd html level 2//" ,
506
+ "-//ietf//dtd html level 3//" ,
507
+ "-//ietf//dtd html strict level 0//" ,
508
+ "-//ietf//dtd html strict level 1//" ,
509
+ "-//ietf//dtd html strict level 2//" ,
510
+ "-//ietf//dtd html strict level 3//" ,
511
+ "-//ietf//dtd html strict//" ,
512
+ "-//ietf//dtd html//" ,
513
+ "-//metrius//dtd metrius presentational//" ,
514
+ "-//microsoft//dtd internet explorer 2.0 html strict//" ,
515
+ "-//microsoft//dtd internet explorer 2.0 html//" ,
516
+ "-//microsoft//dtd internet explorer 2.0 tables//" ,
517
+ "-//microsoft//dtd internet explorer 3.0 html strict//" ,
518
+ "-//microsoft//dtd internet explorer 3.0 html//" ,
519
+ "-//microsoft//dtd internet explorer 3.0 tables//" ,
520
+ "-//netscape comm. corp.//dtd html//" ,
521
+ "-//netscape comm. corp.//dtd strict html//" ,
522
+ "-//o'reilly and associates//dtd html 2.0//" ,
523
+ "-//o'reilly and associates//dtd html extended 1.0//" ,
524
+ "-//o'reilly and associates//dtd html extended relaxed 1.0//" ,
525
+ "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//" ,
526
+ "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//" ,
527
+ "-//spyglass//dtd html 2.0 extended//" ,
528
+ "-//sq//dtd html 2.0 hotmetal + extensions//" ,
529
+ "-//sun microsystems corp.//dtd hotjava html//" ,
530
+ "-//sun microsystems corp.//dtd hotjava strict html//" ,
531
+ "-//w3c//dtd html 3 1995-03-24//" ,
532
+ "-//w3c//dtd html 3.2 draft//" ,
533
+ "-//w3c//dtd html 3.2 final//" ,
534
+ "-//w3c//dtd html 3.2//" ,
535
+ "-//w3c//dtd html 3.2s draft//" ,
536
+ "-//w3c//dtd html 4.0 frameset//" ,
537
+ "-//w3c//dtd html 4.0 transitional//" ,
538
+ "-//w3c//dtd html experimental 19960712//" ,
539
+ "-//w3c//dtd html experimental 970421//" ,
540
+ "-//w3c//dtd w3 html//" ,
541
+ "-//w3o//dtd w3 html 3.0//" ,
542
+ "-//webtechs//dtd mozilla html 2.0//" ,
543
+ "-//webtechs//dtd mozilla html//" ))
544
+ or publicId in
545
+ ("-//w3o//dtd w3 html strict 3.0//en//" ,
546
+ "-/w3c/dtd html 4.0 transitional/en" ,
547
+ "html" )
548
+ or publicId .startswith (
549
+ ("-//w3c//dtd html 4.01 frameset//" ,
550
+ "-//w3c//dtd html 4.01 transitional//" )) and
551
+ systemId == None
552
+ or systemId and systemId .lower () == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" ):
575
553
self .parser .compatMode = "quirks"
576
- elif (publicId in
577
- ("-//w3c//dtd xhtml 1.0 frameset//EN " ,
578
- "-//w3c//dtd xhtml 1.0 transitional//EN" )
579
- or ( publicId in
580
- ("-//w3c//dtd html 4.01 frameset//EN " ,
581
- "-//w3c//dtd html 4.01 transitional//EN" ) and
582
- systemId == None ) ):
554
+ elif (publicId . startswith (
555
+ ("-//w3c//dtd xhtml 1.0 frameset//" ,
556
+ "-//w3c//dtd xhtml 1.0 transitional//" ) )
557
+ or publicId . startswith (
558
+ ("-//w3c//dtd html 4.01 frameset//" ,
559
+ "-//w3c//dtd html 4.01 transitional//" ) ) and
560
+ systemId != None ):
583
561
self .parser .compatMode = "limited quirks"
584
562
585
563
self .parser .phase = self .parser .phases ["beforeHtml" ]
586
-
587
- def processSpaceCharacters (self , token ):
588
- pass
564
+
565
+ def anythingElse (self ):
566
+ self .parser .compatMode = "quirks"
567
+ self .parser .phase = self .parser .phases ["beforeHtml" ]
589
568
590
569
def processCharacters (self , token ):
591
570
self .parser .parseError ("expected-doctype-but-got-chars" )
592
- self .parser .compatMode = "quirks"
593
- self .parser .phase = self .parser .phases ["beforeHtml" ]
571
+ self .anythingElse ()
594
572
self .parser .phase .processCharacters (token )
595
573
596
574
def processStartTag (self , token ):
597
575
self .parser .parseError ("expected-doctype-but-got-start-tag" ,
598
576
{"name" : token ["name" ]})
599
- self .parser .compatMode = "quirks"
600
- self .parser .phase = self .parser .phases ["beforeHtml" ]
577
+ self .anythingElse ()
601
578
self .parser .phase .processStartTag (token )
602
579
603
580
def processEndTag (self , token ):
604
581
self .parser .parseError ("expected-doctype-but-got-end-tag" ,
605
582
{"name" : token ["name" ]})
606
- self .parser .compatMode = "quirks"
607
- self .parser .phase = self .parser .phases ["beforeHtml" ]
583
+ self .anythingElse ()
608
584
self .parser .phase .processEndTag (token )
585
+
586
+ def processEOF (self ):
587
+ self .parser .parseError ("expected-doctype-but-got-eof" )
588
+ self .anythingElse ()
589
+ self .parser .phase .processEOF ()
609
590
610
591
611
592
class BeforeHtmlPhase (Phase ):
0 commit comments