@@ -30,7 +30,7 @@ def __init__(self, data=None):
30
30
31
31
class Doctype (Token ):
32
32
def __init__ (self , name , public_id , system_id , correct ):
33
- self .name = name
33
+ self .name = name . translate ( asciiUpper2Lower )
34
34
self .public_id = public_id
35
35
self .system_id = system_id
36
36
self .correct = correct
@@ -44,7 +44,7 @@ class SpaceCharacters(Token):
44
44
45
45
class Tag (Token ):
46
46
def __init__ (self , name , attributes ):
47
- self .name = name
47
+ self .name = name . translate ( asciiUpper2Lower )
48
48
self .attributes = attributeMap (attributes or {})
49
49
self .self_closing = False
50
50
self .attribute_name = ""
@@ -278,7 +278,6 @@ def emitCurrentToken(self):
278
278
token = self .currentToken
279
279
# Add token to the queue to be yielded
280
280
if isinstance (token , Tag ):
281
- token .name = token .name .translate (asciiUpper2Lower )
282
281
if self .currentToken .attribute_name in self .currentToken .attributes :
283
282
self .tokenQueue .append (ParseError ("duplicate-attribute" ))
284
283
token .clearAttribute ()
@@ -456,7 +455,7 @@ def tagNameState(self):
456
455
self .tokenQueue .append (ParseError ("invalid-codepoint" ))
457
456
self .currentToken .name += "\uFFFD "
458
457
else :
459
- self .currentToken .name += data
458
+ self .currentToken .name += data . translate ( asciiUpper2Lower )
460
459
# (Don't use charsUntil here, because tag names are
461
460
# very short and it's faster to not do anything fancy)
462
461
return True
@@ -475,7 +474,7 @@ def rcdataLessThanSignState(self):
475
474
def rcdataEndTagOpenState (self ):
476
475
data = self .stream .char ()
477
476
if data in asciiLetters :
478
- self .temporaryBuffer += data
477
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
479
478
self .state = self .rcdataEndTagNameState
480
479
else :
481
480
self .tokenQueue .append (Characters ("</" ))
@@ -484,7 +483,7 @@ def rcdataEndTagOpenState(self):
484
483
return True
485
484
486
485
def rcdataEndTagNameState (self ):
487
- appropriate = self .currentToken and self . currentToken . name . lower () == self .temporaryBuffer . lower ()
486
+ appropriate = self .currentToken . name == self .temporaryBuffer
488
487
data = self .stream .char ()
489
488
if data in spaceCharacters and appropriate :
490
489
self .currentToken = EndTag (name = self .temporaryBuffer )
@@ -497,7 +496,7 @@ def rcdataEndTagNameState(self):
497
496
self .emitCurrentToken ()
498
497
self .state = self .dataState
499
498
elif data in asciiLetters :
500
- self .temporaryBuffer += data
499
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
501
500
else :
502
501
self .tokenQueue .append (Characters ("</" + self .temporaryBuffer ))
503
502
self .stream .unget (data )
@@ -518,7 +517,7 @@ def rawtextLessThanSignState(self):
518
517
def rawtextEndTagOpenState (self ):
519
518
data = self .stream .char ()
520
519
if data in asciiLetters :
521
- self .temporaryBuffer += data
520
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
522
521
self .state = self .rawtextEndTagNameState
523
522
else :
524
523
self .tokenQueue .append (Characters ("</" ))
@@ -527,7 +526,7 @@ def rawtextEndTagOpenState(self):
527
526
return True
528
527
529
528
def rawtextEndTagNameState (self ):
530
- appropriate = self .currentToken and self . currentToken . name . lower () == self .temporaryBuffer . lower ()
529
+ appropriate = self .currentToken . name == self .temporaryBuffer
531
530
data = self .stream .char ()
532
531
if data in spaceCharacters and appropriate :
533
532
self .currentToken = EndTag (name = self .temporaryBuffer )
@@ -540,7 +539,7 @@ def rawtextEndTagNameState(self):
540
539
self .emitCurrentToken ()
541
540
self .state = self .dataState
542
541
elif data in asciiLetters :
543
- self .temporaryBuffer += data
542
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
544
543
else :
545
544
self .tokenQueue .append (Characters ("</" + self .temporaryBuffer ))
546
545
self .stream .unget (data )
@@ -564,7 +563,7 @@ def scriptDataLessThanSignState(self):
564
563
def scriptDataEndTagOpenState (self ):
565
564
data = self .stream .char ()
566
565
if data in asciiLetters :
567
- self .temporaryBuffer += data
566
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
568
567
self .state = self .scriptDataEndTagNameState
569
568
else :
570
569
self .tokenQueue .append (Characters ("</" ))
@@ -573,7 +572,7 @@ def scriptDataEndTagOpenState(self):
573
572
return True
574
573
575
574
def scriptDataEndTagNameState (self ):
576
- appropriate = self .currentToken and self . currentToken . name . lower () == self .temporaryBuffer . lower ()
575
+ appropriate = self .currentToken . name == self .temporaryBuffer
577
576
data = self .stream .char ()
578
577
if data in spaceCharacters and appropriate :
579
578
self .currentToken = EndTag (name = self .temporaryBuffer )
@@ -586,7 +585,7 @@ def scriptDataEndTagNameState(self):
586
585
self .emitCurrentToken ()
587
586
self .state = self .dataState
588
587
elif data in asciiLetters :
589
- self .temporaryBuffer += data
588
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
590
589
else :
591
590
self .tokenQueue .append (Characters ("</" + self .temporaryBuffer ))
592
591
self .stream .unget (data )
@@ -675,7 +674,7 @@ def scriptDataEscapedLessThanSignState(self):
675
674
self .state = self .scriptDataEscapedEndTagOpenState
676
675
elif data in asciiLetters :
677
676
self .tokenQueue .append (Characters ("<" + data ))
678
- self .temporaryBuffer = data
677
+ self .temporaryBuffer = data . translate ( asciiUpper2Lower )
679
678
self .state = self .scriptDataDoubleEscapeStartState
680
679
else :
681
680
self .tokenQueue .append (Characters ("<" ))
@@ -686,7 +685,7 @@ def scriptDataEscapedLessThanSignState(self):
686
685
def scriptDataEscapedEndTagOpenState (self ):
687
686
data = self .stream .char ()
688
687
if data in asciiLetters :
689
- self .temporaryBuffer = data
688
+ self .temporaryBuffer = data . translate ( asciiUpper2Lower )
690
689
self .state = self .scriptDataEscapedEndTagNameState
691
690
else :
692
691
self .tokenQueue .append (Characters ("</" ))
@@ -695,7 +694,7 @@ def scriptDataEscapedEndTagOpenState(self):
695
694
return True
696
695
697
696
def scriptDataEscapedEndTagNameState (self ):
698
- appropriate = self .currentToken and self . currentToken . name . lower () == self .temporaryBuffer . lower ()
697
+ appropriate = self .currentToken . name == self .temporaryBuffer
699
698
data = self .stream .char ()
700
699
if data in spaceCharacters and appropriate :
701
700
self .currentToken = EndTag (name = self .temporaryBuffer )
@@ -708,7 +707,7 @@ def scriptDataEscapedEndTagNameState(self):
708
707
self .emitCurrentToken ()
709
708
self .state = self .dataState
710
709
elif data in asciiLetters :
711
- self .temporaryBuffer += data
710
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
712
711
else :
713
712
self .tokenQueue .append (Characters ("</" + self .temporaryBuffer ))
714
713
self .stream .unget (data )
@@ -719,13 +718,13 @@ def scriptDataDoubleEscapeStartState(self):
719
718
data = self .stream .char ()
720
719
if data in (spaceCharacters | frozenset (("/" , ">" ))):
721
720
self .tokenQueue .append (Characters (data ))
722
- if self .temporaryBuffer . lower () == "script" :
721
+ if self .temporaryBuffer == "script" :
723
722
self .state = self .scriptDataDoubleEscapedState
724
723
else :
725
724
self .state = self .scriptDataEscapedState
726
725
elif data in asciiLetters :
727
726
self .tokenQueue .append (Characters (data ))
728
- self .temporaryBuffer += data
727
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
729
728
else :
730
729
self .stream .unget (data )
731
730
self .state = self .scriptDataEscapedState
@@ -806,13 +805,13 @@ def scriptDataDoubleEscapeEndState(self):
806
805
data = self .stream .char ()
807
806
if data in (spaceCharacters | frozenset (("/" , ">" ))):
808
807
self .tokenQueue .append (Characters (data ))
809
- if self .temporaryBuffer . lower () == "script" :
808
+ if self .temporaryBuffer == "script" :
810
809
self .state = self .scriptDataEscapedState
811
810
else :
812
811
self .state = self .scriptDataDoubleEscapedState
813
812
elif data in asciiLetters :
814
813
self .tokenQueue .append (Characters (data ))
815
- self .temporaryBuffer += data
814
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
816
815
else :
817
816
self .stream .unget (data )
818
817
self .state = self .scriptDataDoubleEscapedState
@@ -1240,10 +1239,8 @@ def beforeDoctypeNameState(self):
1240
1239
def doctypeNameState (self ):
1241
1240
data = self .stream .char ()
1242
1241
if data in spaceCharacters :
1243
- self .currentToken .name = self .currentToken .name .translate (asciiUpper2Lower )
1244
1242
self .state = self .afterDoctypeNameState
1245
1243
elif data == ">" :
1246
- self .currentToken .name = self .currentToken .name .translate (asciiUpper2Lower )
1247
1244
self .tokenQueue .append (self .currentToken )
1248
1245
self .state = self .dataState
1249
1246
elif data == "\u0000 " :
@@ -1253,11 +1250,10 @@ def doctypeNameState(self):
1253
1250
elif data is EOF :
1254
1251
self .tokenQueue .append (ParseError ("eof-in-doctype-name" ))
1255
1252
self .currentToken .correct = False
1256
- self .currentToken .name = self .currentToken .name .translate (asciiUpper2Lower )
1257
1253
self .tokenQueue .append (self .currentToken )
1258
1254
self .state = self .dataState
1259
1255
else :
1260
- self .currentToken .name += data
1256
+ self .currentToken .name += data . translate ( asciiUpper2Lower )
1261
1257
return True
1262
1258
1263
1259
def afterDoctypeNameState (self ):
0 commit comments