Skip to content

Commit 7d29315

Browse files
committed
Change CDATA to RAWTEXT (spec r3562). This breaks test-compat.
1 parent 8911496 commit 7d29315

File tree

3 files changed

+26
-26
lines changed

3 files changed

+26
-26
lines changed

src/html5lib/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@
267267
contentModelFlags = {
268268
"PCDATA":0,
269269
"RCDATA":1,
270-
"CDATA":2,
270+
"RAWTEXT":2,
271271
"PLAINTEXT":3
272272
}
273273

src/html5lib/html5parser.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def __init__(self, tree = simpletree.TreeBuilder,
7777
# XXX "inHeadNoscript": InHeadNoScriptPhase(self, self.tree),
7878
"afterHead": AfterHeadPhase(self, self.tree),
7979
"inBody": InBodyPhase(self, self.tree),
80-
"inCDataRCData": InCDataRCDataPhase(self, self.tree),
80+
"inRCDataRawtext": InRCDataRawtextPhase(self, self.tree),
8181
"inTable": InTablePhase(self, self.tree),
8282
"inTableText": InTableTextPhase(self, self.tree),
8383
"inCaption": InCaptionPhase(self, self.tree),
@@ -126,7 +126,7 @@ def reset(self):
126126
if self.innerHTML in cdataElements:
127127
self.tokenizer.contentModelFlag = tokenizer.contentModelFlags["RCDATA"]
128128
elif self.innerHTML in rcdataElements:
129-
self.tokenizer.contentModelFlag = tokenizer.contentModelFlags["CDATA"]
129+
self.tokenizer.contentModelFlag = tokenizer.contentModelFlags["RAWTEXT"]
130130
elif self.innerHTML == 'plaintext':
131131
self.tokenizer.contentModelFlag = tokenizer.contentModelFlags["PLAINTEXT"]
132132
else:
@@ -391,18 +391,18 @@ def resetInsertionMode(self):
391391
self.phase = self.phases["inBody"]
392392
break
393393

394-
def parseRCDataCData(self, token, contentType):
395-
"""Generic (R)CDATA Parsing algorithm
396-
contentType - RCDATA or CDATA
394+
def parseRCDataRawtext(self, token, contentType):
395+
"""Generic RCDATA/RAWTEXT Parsing algorithm
396+
contentType - RCDATA or RAWTEXT
397397
"""
398-
assert contentType in ("CDATA", "RCDATA")
398+
assert contentType in ("RAWTEXT", "RCDATA")
399399

400400
element = self.tree.insertElement(token)
401401
self.tokenizer.contentModelFlag = contentModelFlags[contentType]
402402

403403
self.originalPhase = self.phase
404404

405-
self.phase = self.phases["inCDataRCData"]
405+
self.phase = self.phases["inRCDataRawtext"]
406406

407407
class Phase(object):
408408
"""Base class for helper object that implements each phase of processing
@@ -746,16 +746,16 @@ def startTagMeta(self, token):
746746
self.parser.tokenizer.stream.changeEncoding(codec)
747747

748748
def startTagTitle(self, token):
749-
self.parser.parseRCDataCData(token, "RCDATA")
749+
self.parser.parseRCDataRawtext(token, "RCDATA")
750750

751751
def startTagNoScriptNoFramesStyle(self, token):
752752
#Need to decide whether to implement the scripting-disabled case
753-
self.parser.parseRCDataCData(token, "CDATA")
753+
self.parser.parseRCDataRawtext(token, "RAWTEXT")
754754

755755
def startTagScript(self, token):
756-
#I think this is equivalent to the CDATA stuff since we don't execute script
756+
#I think this is equivalent to the RAWTEXT stuff since we don't execute script
757757
#self.tree.insertElement(token)
758-
self.parser.parseRCDataCData(token, "CDATA")
758+
self.parser.parseRCDataRawtext(token, "RAWTEXT")
759759

760760
def startTagOther(self, token):
761761
self.anythingElse()
@@ -887,7 +887,7 @@ def __init__(self, parser, tree):
887887
("isindex", self.startTagIsIndex),
888888
("textarea", self.startTagTextarea),
889889
("iframe", self.startTagIFrame),
890-
(("noembed", "noframes", "noscript"), self.startTagCdata),
890+
(("noembed", "noframes", "noscript"), self.startTagRawtext),
891891
("select", self.startTagSelect),
892892
(("rp", "rt"), self.startTagRpRt),
893893
(("option", "optgroup"), self.startTagOpt),
@@ -1100,7 +1100,7 @@ def startTagXmp(self, token):
11001100
self.endTagP(impliedTagToken("p"))
11011101
self.tree.reconstructActiveFormattingElements()
11021102
self.parser.framesetOK = False
1103-
self.parser.parseRCDataCData(token, "CDATA")
1103+
self.parser.parseRCDataRawtext(token, "RAWTEXT")
11041104

11051105
def startTagTable(self, token):
11061106
if self.parser.compatMode != "quirks":
@@ -1179,11 +1179,11 @@ def startTagTextarea(self, token):
11791179

11801180
def startTagIFrame(self, token):
11811181
self.parser.framesetOK = False
1182-
self.startTagCdata(token)
1182+
self.startTagRawtext(token)
11831183

1184-
def startTagCdata(self, token):
1184+
def startTagRawtext(self, token):
11851185
"""iframe, noembed noframes, noscript(if scripting enabled)"""
1186-
self.parser.parseRCDataCData(token, "CDATA")
1186+
self.parser.parseRCDataRawtext(token, "RAWTEXT")
11871187

11881188
def startTagOpt(self, token):
11891189
if self.tree.elementInScope("option"):
@@ -1520,7 +1520,7 @@ def endTagOther(self, token):
15201520
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
15211521
break
15221522

1523-
class InCDataRCDataPhase(Phase):
1523+
class InRCDataRawtextPhase(Phase):
15241524
def __init__(self, parser, tree):
15251525
Phase.__init__(self, parser, tree)
15261526
self.startTagHandler = utils.MethodDispatcher([])
@@ -1540,7 +1540,7 @@ def processEOF(self):
15401540
self.parser.phase.processEOF()
15411541

15421542
def startTagOther(self, token):
1543-
assert False, "Tried to process start tag %s in (R)CDATA mode"%name
1543+
assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode"%name
15441544

15451545
def endTagScript(self, token):
15461546
node = self.tree.openElements.pop()

src/html5lib/tokenizer.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ def dataState(self):
265265

266266
# Keep a charbuffer to handle the escapeFlag
267267
if (self.contentModelFlag in
268-
(contentModelFlags["CDATA"], contentModelFlags["RCDATA"])):
268+
(contentModelFlags["RAWTEXT"], contentModelFlags["RCDATA"])):
269269
if len(self.lastFourChars) == 4:
270270
self.lastFourChars.pop(0)
271271
self.lastFourChars.append(data)
@@ -276,20 +276,20 @@ def dataState(self):
276276
not self.escapeFlag):
277277
self.state = self.entityDataState
278278
elif (data == "-" and self.contentModelFlag in
279-
(contentModelFlags["CDATA"], contentModelFlags["RCDATA"]) and
279+
(contentModelFlags["RAWTEXT"], contentModelFlags["RCDATA"]) and
280280
not self.escapeFlag and "".join(self.lastFourChars) == "<!--"):
281281
self.escapeFlag = True
282282
self.tokenQueue.append({"type": tokenTypes["Characters"],
283283
"data":data})
284284
elif (data == "<" and (self.contentModelFlag ==
285285
contentModelFlags["PCDATA"]
286286
or (self.contentModelFlag in
287-
(contentModelFlags["CDATA"],
287+
(contentModelFlags["RAWTEXT"],
288288
contentModelFlags["RCDATA"]) and
289289
self.escapeFlag == False))):
290290
self.state = self.tagOpenState
291291
elif (data == ">" and self.contentModelFlag in
292-
(contentModelFlags["CDATA"], contentModelFlags["RCDATA"]) and
292+
(contentModelFlags["RAWTEXT"], contentModelFlags["RCDATA"]) and
293293
self.escapeFlag and "".join(self.lastFourChars)[1:] == "-->"):
294294
self.escapeFlag = False
295295
self.tokenQueue.append({"type": tokenTypes["Characters"], "data":data})
@@ -308,7 +308,7 @@ def dataState(self):
308308
# any <!-- or --> sequences
309309
else:
310310
if (self.contentModelFlag in
311-
(contentModelFlags["CDATA"], contentModelFlags["RCDATA"])):
311+
(contentModelFlags["RAWTEXT"], contentModelFlags["RCDATA"])):
312312
chars = self.stream.charsUntil((u"&", u"<", u">", u"-"))
313313
self.lastFourChars += chars[-4:]
314314
self.lastFourChars = self.lastFourChars[-4:]
@@ -358,7 +358,7 @@ def tagOpenState(self):
358358
self.stream.unget(data)
359359
self.state = self.dataState
360360
else:
361-
# We know the content model flag is set to either RCDATA or CDATA
361+
# We know the content model flag is set to either RCDATA or RAWTEXT
362362
# now because this state can never be entered with the PLAINTEXT
363363
# flag.
364364
if data == u"/":
@@ -371,7 +371,7 @@ def tagOpenState(self):
371371

372372
def closeTagOpenState(self):
373373
if (self.contentModelFlag in (contentModelFlags["RCDATA"],
374-
contentModelFlags["CDATA"])):
374+
contentModelFlags["RAWTEXT"])):
375375

376376
charStack = []
377377
if self.currentToken:

0 commit comments

Comments
 (0)