@@ -51,6 +51,7 @@ def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
51
51
"attributeValueDoubleQuoted" :self .attributeValueDoubleQuotedState ,
52
52
"attributeValueSingleQuoted" :self .attributeValueSingleQuotedState ,
53
53
"attributeValueUnQuoted" :self .attributeValueUnQuotedState ,
54
+ "afterAttributeValue" :self .afterAttributeValueState ,
54
55
"bogusComment" :self .bogusCommentState ,
55
56
"markupDeclarationOpen" :self .markupDeclarationOpenState ,
56
57
"commentStart" :self .commentStartState ,
@@ -185,10 +186,11 @@ def consumeNumberEntity(self, isHex):
185
186
186
187
return char
187
188
188
- def consumeEntity (self , fromAttribute = False ):
189
+ def consumeEntity (self , allowedChar = None , fromAttribute = False ):
189
190
char = None
190
191
charStack = [self .stream .char ()]
191
- if charStack [0 ] in spaceCharacters or charStack [0 ] in (EOF , "<" , "&" ):
192
+ if charStack [0 ] in spaceCharacters or charStack [0 ] in (EOF , "<" , "&" )\
193
+ or (allowedChar is not None and allowedChar == charStack [0 ]):
192
194
self .stream .unget (charStack )
193
195
elif charStack [0 ] == u"#" :
194
196
# We might have a number entity here.
@@ -260,10 +262,10 @@ def entitiesStartingWith(name):
260
262
self .stream .unget (charStack )
261
263
return char
262
264
263
- def processEntityInAttribute (self ):
265
+ def processEntityInAttribute (self , allowedChar ):
264
266
"""This method replaces the need for "entityInAttributeValueState".
265
267
"""
266
- entity = self .consumeEntity (True )
268
+ entity = self .consumeEntity (allowedChar = allowedChar , fromAttribute = True )
267
269
if entity :
268
270
self .currentToken ["data" ][- 1 ][1 ] += entity
269
271
else :
@@ -479,6 +481,11 @@ def beforeAttributeNameState(self):
479
481
self .emitCurrentToken ()
480
482
elif data == u"/" :
481
483
self .processSolidusInTag ()
484
+ elif data == u"'" or data == u'"' or data == u"=" :
485
+ self .tokenQueue .append ({"type" : "ParseError" , "data" :
486
+ "invalid-character-in-attribute-name" })
487
+ self .currentToken ["data" ].append ([data , "" ])
488
+ self .state = self .states ["attributeName" ]
482
489
elif data == EOF :
483
490
self .tokenQueue .append ({"type" : "ParseError" , "data" :
484
491
"expected-attribute-name-but-got-eof" })
@@ -508,6 +515,11 @@ def attributeNameState(self):
508
515
elif data == u"/" :
509
516
self .processSolidusInTag ()
510
517
self .state = self .states ["beforeAttributeName" ]
518
+ elif data == u"'" or data == u'"' :
519
+ self .tokenQueue .append ({"type" : "ParseError" , "data" :
520
+ "invalid-character-in-attribute-name" })
521
+ self .currentToken ["data" ][- 1 ][0 ] += data
522
+ leavingThisState = False
511
523
elif data == EOF :
512
524
self .tokenQueue .append ({"type" : "ParseError" , "data" :
513
525
"eof-in-attribute-name" })
@@ -570,6 +582,11 @@ def beforeAttributeValueState(self):
570
582
self .state = self .states ["attributeValueSingleQuoted" ]
571
583
elif data == u">" :
572
584
self .emitCurrentToken ()
585
+ elif data == u"=" :
586
+ self .tokenQueue .append ({"type" : "ParseError" , "data" :
587
+ "equals-in-unquoted-attribute-value" })
588
+ self .currentToken ["data" ][- 1 ][1 ] += data
589
+ self .state = self .states ["attributeValueUnQuoted" ]
573
590
elif data == EOF :
574
591
self .tokenQueue .append ({"type" : "ParseError" , "data" :
575
592
"expected-attribute-value-but-got-eof" })
@@ -582,9 +599,9 @@ def beforeAttributeValueState(self):
582
599
def attributeValueDoubleQuotedState (self ):
583
600
data = self .stream .char ()
584
601
if data == "\" " :
585
- self .state = self .states ["beforeAttributeName " ]
602
+ self .state = self .states ["afterAttributeValue " ]
586
603
elif data == u"&" :
587
- self .processEntityInAttribute ()
604
+ self .processEntityInAttribute (u'"' )
588
605
elif data == EOF :
589
606
self .tokenQueue .append ({"type" : "ParseError" , "data" :
590
607
"eof-in-attribute-value-double-quote" })
@@ -597,9 +614,9 @@ def attributeValueDoubleQuotedState(self):
597
614
def attributeValueSingleQuotedState (self ):
598
615
data = self .stream .char ()
599
616
if data == "'" :
600
- self .state = self .states ["beforeAttributeName " ]
617
+ self .state = self .states ["afterAttributeValue " ]
601
618
elif data == u"&" :
602
- self .processEntityInAttribute ()
619
+ self .processEntityInAttribute (u"'" )
603
620
elif data == EOF :
604
621
self .tokenQueue .append ({"type" : "ParseError" , "data" :
605
622
"eof-in-attribute-value-single-quote" })
@@ -614,16 +631,37 @@ def attributeValueUnQuotedState(self):
614
631
if data in spaceCharacters :
615
632
self .state = self .states ["beforeAttributeName" ]
616
633
elif data == u"&" :
617
- self .processEntityInAttribute ()
634
+ self .processEntityInAttribute (None )
618
635
elif data == u">" :
619
636
self .emitCurrentToken ()
637
+ elif data == u'"' or data == u"'" or data == u"=" :
638
+ self .tokenQueue .append ({"type" : "ParseError" , "data" :
639
+ "unexpected-character-in-unquoted-attribute-value" })
640
+ self .currentToken ["data" ][- 1 ][1 ] += data
620
641
elif data == EOF :
621
642
self .tokenQueue .append ({"type" : "ParseError" , "data" :
622
643
"eof-in-attribute-value-no-quotes" })
623
644
self .emitCurrentToken ()
624
645
else :
625
646
self .currentToken ["data" ][- 1 ][1 ] += data + self .stream .charsUntil ( \
626
- frozenset (("&" , ">" ,"<" )) | spaceCharacters )
647
+ frozenset (("&" , ">" , "<" , "=" , "'" , '"' )) | spaceCharacters )
648
+ return True
649
+
650
+ def afterAttributeValueState (self ):
651
+ data = self .stream .char ()
652
+ if data in spaceCharacters :
653
+ self .state = self .states ["beforeAttributeName" ]
654
+ elif data == u">" :
655
+ self .emitCurrentToken ()
656
+ self .state = self .states ["data" ]
657
+ elif data == u"/" :
658
+ self .processSolidusInTag ()
659
+ self .state = self .states ["beforeAttributeName" ]
660
+ else :
661
+ self .tokenQueue .append ({"type" : "ParseError" , "data" :
662
+ "unexpected-character-after-attribute-value" })
663
+ self .stream .unget (data )
664
+ self .state = self .states ["beforeAttributeName" ]
627
665
return True
628
666
629
667
def bogusCommentState (self ):
0 commit comments