Skip to content

Commit 981cbd1

Browse files
committed
Update tokenizer.
1 parent bd57c61 commit 981cbd1

File tree

4 files changed

+492
-136
lines changed

4 files changed

+492
-136
lines changed

src/html5lib/html5parser.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,11 @@ def parseRCDataRawtext(self, token, contentType):
398398
assert contentType in ("RAWTEXT", "RCDATA")
399399

400400
element = self.tree.insertElement(token)
401-
self.tokenizer.contentModelFlag = contentModelFlags[contentType]
401+
402+
if contentType == "RAWTEXT":
403+
self.tokenizer.state = self.tokenizer.rawtextState
404+
else:
405+
self.tokenizer.state = self.tokenizer.rcdataState
402406

403407
self.originalPhase = self.phase
404408

src/html5lib/inputstream.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,20 @@ def charsUntil(self, characters, opposite = False):
449449
r = u"".join(rv)
450450
return r
451451

452+
def charsUntilEOF(self):
453+
""" Returns a string of characters from the stream up to EOF."""
454+
455+
rv = []
456+
457+
while True:
458+
rv.append(self.chunk[self.chunkOffset:])
459+
if not self.readChunk():
460+
# Reached EOF
461+
break
462+
463+
r = u"".join(rv)
464+
return r
465+
452466
def unget(self, char):
453467
# Only one character is allowed to be ungotten at once - it must
454468
# be consumed again before any further call to unget

0 commit comments

Comments
 (0)