@@ -233,7 +233,6 @@ def emitCurrentToken(self):
233
233
token = self .currentToken
234
234
# Add token to the queue to be yielded
235
235
if (token ["type" ] in tagTokenTypes ):
236
- token ["name" ] = token ["name" ].translate (asciiUpper2Lower )
237
236
if token ["type" ] == tokenTypes ["StartTag" ]:
238
237
raw = token ["data" ]
239
238
data = attributeMap (raw )
@@ -380,7 +379,8 @@ def tagOpenState(self):
380
379
self .state = self .closeTagOpenState
381
380
elif data in asciiLetters :
382
381
self .currentToken = {"type" : tokenTypes ["StartTag" ],
383
- "name" : data , "data" : [],
382
+ "name" : data .translate (asciiUpper2Lower ),
383
+ "data" : [],
384
384
"selfClosing" : False ,
385
385
"selfClosingAcknowledged" : False }
386
386
self .state = self .tagNameState
@@ -410,7 +410,8 @@ def tagOpenState(self):
410
410
def closeTagOpenState (self ):
411
411
data = self .stream .char ()
412
412
if data in asciiLetters :
413
- self .currentToken = {"type" : tokenTypes ["EndTag" ], "name" : data ,
413
+ self .currentToken = {"type" : tokenTypes ["EndTag" ],
414
+ "name" : data .translate (asciiUpper2Lower ),
414
415
"data" : [], "selfClosing" : False }
415
416
self .state = self .tagNameState
416
417
elif data == ">" :
@@ -448,7 +449,7 @@ def tagNameState(self):
448
449
"data" : "invalid-codepoint" })
449
450
self .currentToken ["name" ] += "\uFFFD "
450
451
else :
451
- self .currentToken ["name" ] += data
452
+ self .currentToken ["name" ] += data . translate ( asciiUpper2Lower )
452
453
# (Don't use charsUntil here, because tag names are
453
454
# very short and it's faster to not do anything fancy)
454
455
return True
@@ -467,7 +468,7 @@ def rcdataLessThanSignState(self):
467
468
def rcdataEndTagOpenState (self ):
468
469
data = self .stream .char ()
469
470
if data in asciiLetters :
470
- self .temporaryBuffer += data
471
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
471
472
self .state = self .rcdataEndTagNameState
472
473
else :
473
474
self .tokenQueue .append ({"type" : tokenTypes ["Characters" ], "data" : "</" })
@@ -476,7 +477,7 @@ def rcdataEndTagOpenState(self):
476
477
return True
477
478
478
479
def rcdataEndTagNameState (self ):
479
- appropriate = self .currentToken and self . currentToken ["name" ]. lower () == self .temporaryBuffer . lower ()
480
+ appropriate = self .currentToken ["name" ] == self .temporaryBuffer
480
481
data = self .stream .char ()
481
482
if data in spaceCharacters and appropriate :
482
483
self .currentToken = {"type" : tokenTypes ["EndTag" ],
@@ -495,7 +496,7 @@ def rcdataEndTagNameState(self):
495
496
self .emitCurrentToken ()
496
497
self .state = self .dataState
497
498
elif data in asciiLetters :
498
- self .temporaryBuffer += data
499
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
499
500
else :
500
501
self .tokenQueue .append ({"type" : tokenTypes ["Characters" ],
501
502
"data" : "</" + self .temporaryBuffer })
@@ -517,7 +518,7 @@ def rawtextLessThanSignState(self):
517
518
def rawtextEndTagOpenState (self ):
518
519
data = self .stream .char ()
519
520
if data in asciiLetters :
520
- self .temporaryBuffer += data
521
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
521
522
self .state = self .rawtextEndTagNameState
522
523
else :
523
524
self .tokenQueue .append ({"type" : tokenTypes ["Characters" ], "data" : "</" })
@@ -526,7 +527,7 @@ def rawtextEndTagOpenState(self):
526
527
return True
527
528
528
529
def rawtextEndTagNameState (self ):
529
- appropriate = self .currentToken and self . currentToken ["name" ]. lower () == self .temporaryBuffer . lower ()
530
+ appropriate = self .currentToken ["name" ] == self .temporaryBuffer
530
531
data = self .stream .char ()
531
532
if data in spaceCharacters and appropriate :
532
533
self .currentToken = {"type" : tokenTypes ["EndTag" ],
@@ -545,7 +546,7 @@ def rawtextEndTagNameState(self):
545
546
self .emitCurrentToken ()
546
547
self .state = self .dataState
547
548
elif data in asciiLetters :
548
- self .temporaryBuffer += data
549
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
549
550
else :
550
551
self .tokenQueue .append ({"type" : tokenTypes ["Characters" ],
551
552
"data" : "</" + self .temporaryBuffer })
@@ -570,7 +571,7 @@ def scriptDataLessThanSignState(self):
570
571
def scriptDataEndTagOpenState (self ):
571
572
data = self .stream .char ()
572
573
if data in asciiLetters :
573
- self .temporaryBuffer += data
574
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
574
575
self .state = self .scriptDataEndTagNameState
575
576
else :
576
577
self .tokenQueue .append ({"type" : tokenTypes ["Characters" ], "data" : "</" })
@@ -579,7 +580,7 @@ def scriptDataEndTagOpenState(self):
579
580
return True
580
581
581
582
def scriptDataEndTagNameState (self ):
582
- appropriate = self .currentToken and self . currentToken ["name" ]. lower () == self .temporaryBuffer . lower ()
583
+ appropriate = self .currentToken ["name" ] == self .temporaryBuffer
583
584
data = self .stream .char ()
584
585
if data in spaceCharacters and appropriate :
585
586
self .currentToken = {"type" : tokenTypes ["EndTag" ],
@@ -598,7 +599,7 @@ def scriptDataEndTagNameState(self):
598
599
self .emitCurrentToken ()
599
600
self .state = self .dataState
600
601
elif data in asciiLetters :
601
- self .temporaryBuffer += data
602
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
602
603
else :
603
604
self .tokenQueue .append ({"type" : tokenTypes ["Characters" ],
604
605
"data" : "</" + self .temporaryBuffer })
@@ -695,7 +696,7 @@ def scriptDataEscapedLessThanSignState(self):
695
696
self .state = self .scriptDataEscapedEndTagOpenState
696
697
elif data in asciiLetters :
697
698
self .tokenQueue .append ({"type" : tokenTypes ["Characters" ], "data" : "<" + data })
698
- self .temporaryBuffer = data
699
+ self .temporaryBuffer = data . translate ( asciiUpper2Lower )
699
700
self .state = self .scriptDataDoubleEscapeStartState
700
701
else :
701
702
self .tokenQueue .append ({"type" : tokenTypes ["Characters" ], "data" : "<" })
@@ -706,7 +707,7 @@ def scriptDataEscapedLessThanSignState(self):
706
707
def scriptDataEscapedEndTagOpenState (self ):
707
708
data = self .stream .char ()
708
709
if data in asciiLetters :
709
- self .temporaryBuffer = data
710
+ self .temporaryBuffer = data . translate ( asciiUpper2Lower )
710
711
self .state = self .scriptDataEscapedEndTagNameState
711
712
else :
712
713
self .tokenQueue .append ({"type" : tokenTypes ["Characters" ], "data" : "</" })
@@ -715,7 +716,7 @@ def scriptDataEscapedEndTagOpenState(self):
715
716
return True
716
717
717
718
def scriptDataEscapedEndTagNameState (self ):
718
- appropriate = self .currentToken and self . currentToken ["name" ]. lower () == self .temporaryBuffer . lower ()
719
+ appropriate = self .currentToken ["name" ] == self .temporaryBuffer
719
720
data = self .stream .char ()
720
721
if data in spaceCharacters and appropriate :
721
722
self .currentToken = {"type" : tokenTypes ["EndTag" ],
@@ -734,7 +735,7 @@ def scriptDataEscapedEndTagNameState(self):
734
735
self .emitCurrentToken ()
735
736
self .state = self .dataState
736
737
elif data in asciiLetters :
737
- self .temporaryBuffer += data
738
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
738
739
else :
739
740
self .tokenQueue .append ({"type" : tokenTypes ["Characters" ],
740
741
"data" : "</" + self .temporaryBuffer })
@@ -746,13 +747,13 @@ def scriptDataDoubleEscapeStartState(self):
746
747
data = self .stream .char ()
747
748
if data in (spaceCharacters | frozenset (("/" , ">" ))):
748
749
self .tokenQueue .append ({"type" : tokenTypes ["Characters" ], "data" : data })
749
- if self .temporaryBuffer . lower () == "script" :
750
+ if self .temporaryBuffer == "script" :
750
751
self .state = self .scriptDataDoubleEscapedState
751
752
else :
752
753
self .state = self .scriptDataEscapedState
753
754
elif data in asciiLetters :
754
755
self .tokenQueue .append ({"type" : tokenTypes ["Characters" ], "data" : data })
755
- self .temporaryBuffer += data
756
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
756
757
else :
757
758
self .stream .unget (data )
758
759
self .state = self .scriptDataEscapedState
@@ -842,13 +843,13 @@ def scriptDataDoubleEscapeEndState(self):
842
843
data = self .stream .char ()
843
844
if data in (spaceCharacters | frozenset (("/" , ">" ))):
844
845
self .tokenQueue .append ({"type" : tokenTypes ["Characters" ], "data" : data })
845
- if self .temporaryBuffer . lower () == "script" :
846
+ if self .temporaryBuffer == "script" :
846
847
self .state = self .scriptDataEscapedState
847
848
else :
848
849
self .state = self .scriptDataDoubleEscapedState
849
850
elif data in asciiLetters :
850
851
self .tokenQueue .append ({"type" : tokenTypes ["Characters" ], "data" : data })
851
- self .temporaryBuffer += data
852
+ self .temporaryBuffer += data . translate ( asciiUpper2Lower )
852
853
else :
853
854
self .stream .unget (data )
854
855
self .state = self .scriptDataDoubleEscapedState
@@ -859,7 +860,8 @@ def beforeAttributeNameState(self):
859
860
if data in spaceCharacters :
860
861
self .stream .charsUntil (spaceCharacters , True )
861
862
elif data in asciiLetters :
862
- self .currentToken ["data" ].append ([data , "" ])
863
+ attr_name = data .translate (asciiUpper2Lower )
864
+ self .currentToken ["data" ].append ([attr_name , "" ])
863
865
self .state = self .attributeNameState
864
866
elif data == ">" :
865
867
self .emitCurrentToken ()
@@ -891,7 +893,7 @@ def attributeNameState(self):
891
893
if data == "=" :
892
894
self .state = self .beforeAttributeValueState
893
895
elif data in asciiLetters :
894
- self .currentToken ["data" ][- 1 ][0 ] += data
896
+ self .currentToken ["data" ][- 1 ][0 ] += data . translate ( asciiUpper2Lower )
895
897
leavingThisState = False
896
898
elif data == ">" :
897
899
# XXX If we emit here the attributes are converted to a dict
@@ -918,15 +920,13 @@ def attributeNameState(self):
918
920
"data" : "eof-in-attribute-name" })
919
921
self .state = self .dataState
920
922
else :
921
- self .currentToken ["data" ][- 1 ][0 ] += data
923
+ self .currentToken ["data" ][- 1 ][0 ] += data . translate ( asciiUpper2Lower )
922
924
leavingThisState = False
923
925
924
926
if leavingThisState :
925
927
# Attributes are not dropped at this stage. That happens when the
926
928
# start tag token is emitted so values can still be safely appended
927
929
# to attributes, but we do want to report the parse error in time.
928
- self .currentToken ["data" ][- 1 ][0 ] = (
929
- self .currentToken ["data" ][- 1 ][0 ].translate (asciiUpper2Lower ))
930
930
for name , _ in self .currentToken ["data" ][:- 1 ]:
931
931
if self .currentToken ["data" ][- 1 ][0 ] == name :
932
932
self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
@@ -946,7 +946,8 @@ def afterAttributeNameState(self):
946
946
elif data == ">" :
947
947
self .emitCurrentToken ()
948
948
elif data in asciiLetters :
949
- self .currentToken ["data" ].append ([data , "" ])
949
+ attr_name = data .translate (asciiUpper2Lower )
950
+ self .currentToken ["data" ].append ([attr_name , "" ])
950
951
self .state = self .attributeNameState
951
952
elif data == "/" :
952
953
self .state = self .selfClosingStartTagState
@@ -1340,17 +1341,15 @@ def beforeDoctypeNameState(self):
1340
1341
self .tokenQueue .append (self .currentToken )
1341
1342
self .state = self .dataState
1342
1343
else :
1343
- self .currentToken ["name" ] = data
1344
+ self .currentToken ["name" ] = data . translate ( asciiUpper2Lower )
1344
1345
self .state = self .doctypeNameState
1345
1346
return True
1346
1347
1347
1348
def doctypeNameState (self ):
1348
1349
data = self .stream .char ()
1349
1350
if data in spaceCharacters :
1350
- self .currentToken ["name" ] = self .currentToken ["name" ].translate (asciiUpper2Lower )
1351
1351
self .state = self .afterDoctypeNameState
1352
1352
elif data == ">" :
1353
- self .currentToken ["name" ] = self .currentToken ["name" ].translate (asciiUpper2Lower )
1354
1353
self .tokenQueue .append (self .currentToken )
1355
1354
self .state = self .dataState
1356
1355
elif data == "\u0000 " :
@@ -1362,11 +1361,10 @@ def doctypeNameState(self):
1362
1361
self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
1363
1362
"eof-in-doctype-name" })
1364
1363
self .currentToken ["correct" ] = False
1365
- self .currentToken ["name" ] = self .currentToken ["name" ].translate (asciiUpper2Lower )
1366
1364
self .tokenQueue .append (self .currentToken )
1367
1365
self .state = self .dataState
1368
1366
else :
1369
- self .currentToken ["name" ] += data
1367
+ self .currentToken ["name" ] += data . translate ( asciiUpper2Lower )
1370
1368
return True
1371
1369
1372
1370
def afterDoctypeNameState (self ):
0 commit comments