@@ -39,10 +39,11 @@ class HTMLTokenizer:
39
39
# XXX need to fix documentation
40
40
41
41
def __init__ (self , stream , encoding = None , parseMeta = True , useChardet = True ,
42
- lowercaseElementName = True , lowercaseAttrName = True ):
42
+ lowercaseElementName = True , lowercaseAttrName = True , parser = None ):
43
43
44
44
self .stream = HTMLInputStream (stream , encoding , parseMeta , useChardet )
45
-
45
+ self .parser = parser
46
+
46
47
#Perform case conversions?
47
48
self .lowercaseElementName = lowercaseElementName
48
49
self .lowercaseAttrName = lowercaseAttrName
@@ -1062,6 +1063,19 @@ def markupDeclarationOpenState(self):
1062
1063
"correct" : True }
1063
1064
self .state = self .doctypeState
1064
1065
return True
1066
+ elif (charStack [- 1 ] == "[" and
1067
+ self .parser is not None and
1068
+ self .parser .phase == self .parser .phases ["inForeignContent" ] and
1069
+ self .parser .tree .openElements [- 1 ].namespace != self .parser .tree .defaultNamespace ):
1070
+ matched = True
1071
+ for expected in ["C" , "D" , "A" , "T" , "A" , "[" ]:
1072
+ charStack .append (self .stream .char ())
1073
+ if charStack [- 1 ] != expected :
1074
+ matched = False
1075
+ break
1076
+ if matched :
1077
+ self .state = self .cdataSectionState
1078
+ return True
1065
1079
1066
1080
self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
1067
1081
"expected-dashes-or-doctype" })
@@ -1563,3 +1577,29 @@ def bogusDoctypeState(self):
1563
1577
else :
1564
1578
pass
1565
1579
return True
1580
+
1581
+ def cdataSectionState (self ):
1582
+ data = []
1583
+ while True :
1584
+ data .append (self .stream .charsUntil (u"]" ))
1585
+ charStack = []
1586
+
1587
+ for expected in ["]" , "]" , ">" ]:
1588
+ charStack .append (self .stream .char ())
1589
+ matched = True
1590
+ if charStack [- 1 ] == EOF :
1591
+ data .extend (charStack [:- 1 ])
1592
+ break
1593
+ elif charStack [- 1 ] != expected :
1594
+ matched = False
1595
+ data .extend (charStack )
1596
+ break
1597
+
1598
+ if matched :
1599
+ break
1600
+ data = "" .join (data )
1601
+ if data :
1602
+ self .tokenQueue .append ({"type" : tokenTypes ["Characters" ], "data" :
1603
+ data })
1604
+ self .state = self .dataState
1605
+ return True
0 commit comments