@@ -354,15 +354,22 @@ def dataState(self):
354
354
self .tokenQueue .append ({"type" : "SpaceCharacters" , "data" :
355
355
data + self .stream .charsUntil (spaceCharacters , True )})
356
356
# No need to update lastFourChars here, since the first space will
357
- # have already broken any <!-- or --> sequences
357
+ # have already been appended to lastFourChars and will have broken
358
+ # any <!-- or --> sequences
358
359
else :
359
- chars = self .stream .charsUntil (("&" , "<" , ">" , "-" ))
360
- self .tokenQueue .append ({"type" : "Characters" , "data" :
360
+ if self .contentModelFlag in \
361
+ (contentModelFlags ["CDATA" ], contentModelFlags ["RCDATA" ]):
362
+ chars = self .stream .charsUntil ((u"&" , u"<" , u">" , u"-" ))
363
+ self .lastFourChars += chars [- 4 :]
364
+ self .lastFourChars = self .lastFourChars [- 4 :]
365
+ else :
366
+ chars = self .stream .charsUntil ((u"&" , u"<" ))
367
+ # lastFourChars only needs to be kept up-to-date if we're
368
+ # in CDATA or RCDATA, so ignore it here
369
+ self .tokenQueue .append ({"type" : "Characters" , "data" :
361
370
data + chars })
362
- self .lastFourChars += chars [- 4 :]
363
- self .lastFourChars = self .lastFourChars [- 4 :]
364
371
return True
365
-
372
+
366
373
def entityDataState (self ):
367
374
entity = self .consumeEntity ()
368
375
if entity :
0 commit comments