@@ -150,6 +150,19 @@ def reset(self):
150
150
self .beforeRCDataPhase = None
151
151
152
152
self .framesetOK = True
153
+
154
+ def isHTMLIntegrationPoint (self , element ):
155
+ if (element .name == "annotation-xml" and
156
+ element .namespace == namespaces ["mathml" ]):
157
+ return ("encoding" in element .attributes and
158
+ element .attributes ["encoding" ].translate (
159
+ asciiUpper2Lower ) in
160
+ ("text/html" , "application/xhtml+xml" ))
161
+ else :
162
+ return (element .namespace , element .name ) in htmlIntegrationPointElements
163
+
164
+ def isMathMLTextIntegrationPoint (self , element ):
165
+ return (element .namespace , element .name ) in mathmlTextIntegrationPointElements
153
166
154
167
def mainLoop (self ):
155
168
CharactersToken = tokenTypes ["Characters" ]
@@ -158,27 +171,48 @@ def mainLoop(self):
158
171
EndTagToken = tokenTypes ["EndTag" ]
159
172
CommentToken = tokenTypes ["Comment" ]
160
173
DoctypeToken = tokenTypes ["Doctype" ]
161
-
174
+ ParseErrorToken = tokenTypes [ "ParseError" ]
162
175
163
176
for token in self .normalizedTokens ():
164
177
new_token = token
165
178
while new_token is not None :
179
+ currentNode = self .tree .openElements [- 1 ] if self .tree .openElements else None
180
+ currentNodeNamespace = currentNode .namespace if currentNode else None
181
+ currentNodeName = currentNode .name if currentNode else None
182
+
166
183
type = new_token ["type" ]
167
- if type == CharactersToken :
168
- new_token = self .phase .processCharacters (new_token )
169
- elif type == SpaceCharactersToken :
170
- new_token = self .phase .processSpaceCharacters (new_token )
171
- elif type == StartTagToken :
172
- new_token = self .phase .processStartTag (new_token )
173
- elif type == EndTagToken :
174
- new_token = self .phase .processEndTag (new_token )
175
- elif type == CommentToken :
176
- new_token = self .phase .processComment (new_token )
177
- elif type == DoctypeToken :
178
- new_token = self .phase .processDoctype (new_token )
179
- else :
184
+
185
+ if type == ParseErrorToken :
180
186
self .parseError (new_token ["data" ], new_token .get ("datavars" , {}))
181
187
new_token = None
188
+ else :
189
+ if (len (self .tree .openElements ) == 0 or
190
+ currentNodeNamespace == self .tree .defaultNamespace or
191
+ (self .isMathMLTextIntegrationPoint (currentNode ) and
192
+ ((type == StartTagToken and
193
+ token ["name" ] not in frozenset (["mglyph" , "malignmark" ])) or
194
+ type in (CharactersToken , SpaceCharactersToken ))) or
195
+ (currentNodeNamespace == namespaces ["mathml" ] and
196
+ currentNodeName == "annotation-xml" and
197
+ token ["name" ] == "svg" ) or
198
+ (self .isHTMLIntegrationPoint (currentNode ) and
199
+ type in (StartTagToken , CharactersToken , SpaceCharactersToken ))):
200
+ phase = self .phase
201
+ else :
202
+ phase = self .phases ["inForeignContent" ]
203
+
204
+ if type == CharactersToken :
205
+ new_token = phase .processCharacters (new_token )
206
+ elif type == SpaceCharactersToken :
207
+ new_token = phase .processSpaceCharacters (new_token )
208
+ elif type == StartTagToken :
209
+ new_token = phase .processStartTag (new_token )
210
+ elif type == EndTagToken :
211
+ new_token = phase .processEndTag (new_token )
212
+ elif type == CommentToken :
213
+ new_token = phase .processComment (new_token )
214
+ elif type == DoctypeToken :
215
+ new_token = phase .processDoctype (new_token )
182
216
183
217
if (type == StartTagToken and token ["selfClosing" ]
184
218
and not token ["selfClosingAcknowledged" ]):
@@ -379,12 +413,12 @@ def resetInsertionMode(self):
379
413
if nodeName in ("select" , "colgroup" , "head" , "html" ):
380
414
assert self .innerHTML
381
415
416
+ if not last and node .namespace != self .tree .defaultNamespace :
417
+ continue
418
+
382
419
if nodeName in newModes :
383
420
new_phase = self .phases [newModes [nodeName ]]
384
421
break
385
- elif node .namespace in (namespaces ["mathml" ], namespaces ["svg" ]):
386
- new_phase = self .phases ["inForeignContent" ]
387
- break
388
422
elif last :
389
423
new_phase = self .phases ["inBody" ]
390
424
break
@@ -419,7 +453,6 @@ def wrapped(self, *args, **kwargs):
419
453
try :
420
454
info = {"type" :type_names [token ['type' ]]}
421
455
except :
422
- print token
423
456
raise
424
457
if token ['type' ] in constants .tagTokenTypes :
425
458
info ["name" ] = token ['name' ]
@@ -1243,7 +1276,6 @@ def startTagMath(self, token):
1243
1276
self .tree .insertElement (token )
1244
1277
#Need to get the parse error right for the case where the token
1245
1278
#has a namespace not equal to the xmlns attribute
1246
- self .parser .phase = self .parser .phases ["inForeignContent" ]
1247
1279
if token ["selfClosing" ]:
1248
1280
self .tree .openElements .pop ()
1249
1281
token ["selfClosingAcknowledged" ] = True
@@ -1256,7 +1288,6 @@ def startTagSvg(self, token):
1256
1288
self .tree .insertElement (token )
1257
1289
#Need to get the parse error right for the case where the token
1258
1290
#has a namespace not equal to the xmlns attribute
1259
- self .parser .phase = self .parser .phases ["inForeignContent" ]
1260
1291
if token ["selfClosing" ]:
1261
1292
self .tree .openElements .pop ()
1262
1293
token ["selfClosingAcknowledged" ] = True
@@ -1741,7 +1772,7 @@ def processSpaceCharacters(self, token):
1741
1772
self .characterTokens .append (token )
1742
1773
# assert False
1743
1774
1744
- def processStartTag (self , token ):
1775
+ def processStartTag (self , token ):
1745
1776
self .flushCharacters ()
1746
1777
self .parser .phase = self .originalPhase
1747
1778
return token
@@ -2298,7 +2329,7 @@ def endTagOther(self, token):
2298
2329
class InForeignContentPhase (Phase ):
2299
2330
breakoutElements = frozenset (["b" , "big" , "blockquote" , "body" , "br" ,
2300
2331
"center" , "code" , "dd" , "div" , "dl" , "dt" ,
2301
- "em" , "embed" , "font" , " h1" , "h2" , "h3" ,
2332
+ "em" , "embed" , "h1" , "h2" , "h3" ,
2302
2333
"h4" , "h5" , "h6" , "head" , "hr" , "i" , "img" ,
2303
2334
"li" , "listing" , "menu" , "meta" , "nobr" ,
2304
2335
"ol" , "p" , "pre" , "ruby" , "s" , "small" ,
@@ -2307,19 +2338,6 @@ class InForeignContentPhase(Phase):
2307
2338
def __init__ (self , parser , tree ):
2308
2339
Phase .__init__ (self , parser , tree )
2309
2340
2310
- def isHTMLIntegrationPoint (self , element ):
2311
- if (element .name == "annotation-xml" and
2312
- element .namespace == namespaces ["mathml" ]):
2313
- return ("encoding" in element .attributes and
2314
- element .attributes ["encoding" ].translate (
2315
- asciiUpper2Lower ) in
2316
- ("text/html" , "application/xhtml+xml" ))
2317
- else :
2318
- return (element .namespace , element .name ) in htmlIntegrationPointElements
2319
-
2320
- def isMathMLTextIntegrationPoint (self , element ):
2321
- return (element .namespace , element .name ) in mathmlTextIntegrationPointElements
2322
-
2323
2341
def adjustSVGTagNames (self , token ):
2324
2342
replacements = {u"altglyph" :u"altGlyph" ,
2325
2343
u"altglyphdef" :u"altGlyphDef" ,
@@ -2362,48 +2380,25 @@ def adjustSVGTagNames(self, token):
2362
2380
token ["name" ] = replacements [token ["name" ]]
2363
2381
2364
2382
def processCharacters (self , token ):
2365
- if (self .tree .openElements [- 1 ].namespace == self .tree .defaultNamespace or
2366
- self .isHTMLIntegrationPoint (self .tree .openElements [- 1 ])):
2367
- new_token = self .parser .phases ["inBody" ].processCharacters (token )
2368
- self .parser .resetInsertionMode ()
2369
- return new_token
2370
- elif token ["data" ] == u"\u0000 " :
2383
+ if token ["data" ] == u"\u0000 " :
2371
2384
token ["data" ] = u"\uFFFD "
2372
2385
elif (self .parser .framesetOK and
2373
2386
any (char not in spaceCharacters for char in token ["data" ])):
2374
2387
self .parser .framesetOK = False
2375
2388
Phase .processCharacters (self , token )
2376
2389
2377
- def processEOF (self ):
2378
- reprocess = self .parser .phases ["inBody" ].processEOF ()
2379
- self .parser .resetInsertionMode ()
2380
- return reprocess
2381
-
2382
2390
def processStartTag (self , token ):
2383
2391
currentNode = self .tree .openElements [- 1 ]
2384
- currentNodeNamespace = currentNode .namespace
2385
- currentNodeName = currentNode .name
2386
- if (currentNodeNamespace == self .tree .defaultNamespace or
2387
- (self .isMathMLTextIntegrationPoint (currentNode ) and
2388
- token ["name" ] not in frozenset (["mglyph" , "malignmark" ])) or
2389
- (currentNodeNamespace == namespaces ["mathml" ] and
2390
- currentNodeName == "annotation-xml" and
2391
- token ["name" ] == "svg" ) or
2392
- self .isHTMLIntegrationPoint (currentNode )):
2393
-
2394
- new_token = self .parser .phases ["inBody" ].processStartTag (token )
2395
- self .parser .resetInsertionMode ()
2396
- return new_token
2397
-
2398
- elif token ["name" ] in self .breakoutElements :
2392
+ if (token ["name" ] in self .breakoutElements or
2393
+ (token ["name" ] == "font" and
2394
+ set (token ["data" ].keys ()) | set ("color" , "face" , "size" ))):
2399
2395
self .parser .parseError ("unexpected-html-element-in-foreign-content" ,
2400
2396
token ["name" ])
2401
2397
while (self .tree .openElements [- 1 ].namespace !=
2402
2398
self .tree .defaultNamespace and
2403
- not self .isHTMLIntegrationPoint (self .tree .openElements [- 1 ]) and
2404
- not self .isMathMLTextIntegrationPoint (self .tree .openElements [- 1 ])):
2399
+ not self .parser . isHTMLIntegrationPoint (self .tree .openElements [- 1 ]) and
2400
+ not self .parser . isMathMLTextIntegrationPoint (self .tree .openElements [- 1 ])):
2405
2401
self .tree .openElements .pop ()
2406
- self .parser .resetInsertionMode ()
2407
2402
return token
2408
2403
2409
2404
else :
@@ -2420,33 +2415,29 @@ def processStartTag(self, token):
2420
2415
token ["selfClosingAcknowledged" ] = True
2421
2416
2422
2417
def processEndTag (self , token ):
2423
- if self .tree .openElements [- 1 ].namespace == self .tree .defaultNamespace :
2424
- new_token = self .parser .phases ["inBody" ].processEndTag (token )
2425
- self .parser .resetInsertionMode ()
2426
- return new_token
2427
- else :
2428
- nodeIndex = len (self .tree .openElements ) - 1
2429
- node = self .tree .openElements [- 1 ]
2430
- if node .name != token ["name" ]:
2431
- self .parser .parseError ("unexpected-end-tag" , token ["name" ])
2432
-
2433
- while True :
2434
- if node .name .translate (asciiUpper2Lower ) == token ["name" ]:
2435
- while self .tree .openElements .pop () != node :
2436
- assert self .tree .openElements
2437
- new_token = None
2438
- break
2439
- nodeIndex -= 1
2418
+ nodeIndex = len (self .tree .openElements ) - 1
2419
+ node = self .tree .openElements [- 1 ]
2420
+ if node .name != token ["name" ]:
2421
+ self .parser .parseError ("unexpected-end-tag" , token ["name" ])
2422
+
2423
+ while True :
2424
+ if node .name .translate (asciiUpper2Lower ) == token ["name" ]:
2425
+ if self .parser .phase == self .parser .phases ["inTableText" ]:
2426
+ self .parser .phase .flushCharacters ()
2427
+ self .parser .phase = self .parser .phase .originalPhase
2428
+ while self .tree .openElements .pop () != node :
2429
+ assert self .tree .openElements
2430
+ new_token = None
2431
+ break
2432
+ nodeIndex -= 1
2440
2433
2441
- node = self .tree .openElements [nodeIndex ]
2442
- if node .namespace != self .tree .defaultNamespace :
2443
- continue
2444
- else :
2445
- new_token = self .parser .phases ["inBody" ].processEndTag (token )
2446
- break
2447
- if self .parser .phase == self :
2448
- self .parser .resetInsertionMode ()
2449
- return new_token
2434
+ node = self .tree .openElements [nodeIndex ]
2435
+ if node .namespace != self .tree .defaultNamespace :
2436
+ continue
2437
+ else :
2438
+ new_token = self .parser .phase .processEndTag (token )
2439
+ break
2440
+ return new_token
2450
2441
2451
2442
2452
2443
class AfterBodyPhase (Phase ):
0 commit comments