Skip to content

Commit 94de3bd

Browse files
committed
reorder some methods to match the scheme set up in Phase and add a comment about a potential merging of the initial and root element phases
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40375
1 parent da9201f commit 94de3bd

File tree

2 files changed

+23
-20
lines changed

2 files changed

+23
-20
lines changed

src/parser.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -192,16 +192,24 @@ def processEndTag(self, name):
192192

193193

194194
class InitialPhase(Phase):
195-
# XXX This phase deals with error handling as well which is currently not
196-
# in the specification.
197-
198-
def processDoctype(self, name, error):
199-
self.tree.insertDoctype(name)
195+
# This phase deals with error handling as well which is currently not
196+
# covered in the specification. The error handling is typically known as
197+
# "quirks mode". It is expected that a future version of HTML5 will defin
198+
# this.
199+
#
200+
# AT Given that. I think it should be possible to merge this phase with the
201+
# root element phase in due course.
202+
def processEOF(self):
200203
self.parser.phase = self.parser.phases["rootElement"]
204+
self.parser.phase.processEOF()
201205

202206
def processComment(self, data):
203207
self.tree.insertComment(data, self.tree.document)
204208

209+
def processDoctype(self, name, error):
210+
self.tree.insertDoctype(name)
211+
self.parser.phase = self.parser.phases["rootElement"]
212+
205213
def processSpaceCharacters(self, data):
206214
self.tree.insertText(data, self.tree.document)
207215

@@ -218,10 +226,6 @@ def processEndTag(self, name):
218226
self.parser.phase = self.parser.phases["rootElement"]
219227
self.parser.phase.processEndTag(name)
220228

221-
def processEOF(self):
222-
self.parser.phase = self.parser.phases["rootElement"]
223-
self.parser.phase.processEOF()
224-
225229

226230
class RootElementPhase(Phase):
227231
# helper methods
@@ -232,6 +236,10 @@ def insertHtmlElement(self):
232236
self.parser.phase = self.parser.phases["beforeHead"]
233237

234238
# other
239+
def processEOF(self):
240+
self.insertHtmlElement()
241+
self.parser.phase.processEOF()
242+
235243
def processComment(self, data):
236244
self.tree.insertComment(data, self.tree.document)
237245

@@ -250,10 +258,6 @@ def processEndTag(self, name):
250258
self.insertHtmlElement()
251259
self.parser.phase.processEndTag(name)
252260

253-
def processEOF(self):
254-
self.insertHtmlElement()
255-
self.parser.phase.processEOF()
256-
257261

258262
class BeforeHeadPhase(Phase):
259263
def __init__(self, parser, tree):
@@ -1585,12 +1589,12 @@ def endTagOther(self, name):
15851589

15861590

15871591
class TrailingEndPhase(Phase):
1588-
def processComment(self, data):
1589-
self.parser.insertCommenr(data, self.tree.document)
1590-
15911592
def processEOF(self):
15921593
pass
15931594

1595+
def processComment(self, data):
1596+
self.parser.insertCommenr(data, self.tree.document)
1597+
15941598
def processSpaceCharacters(self, data):
15951599
self.parser.lastPhase.processCharacters(data)
15961600

src/tokenizer.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,6 @@ def closeTagOpenState(self):
406406
elif data == EOF:
407407
self.tokenQueue.append({"type": "ParseError"})
408408
self.tokenQueue.append({"type": "Characters", "data": u"</"})
409-
self.stream.queue.append(data)
410409
self.state = self.states["data"]
411410
else:
412411
self.tokenQueue.append({"type": "ParseError"})
@@ -599,7 +598,7 @@ def commentState(self):
599598
if data == u"-":
600599
self.state = self.states["commentDash"]
601600
elif data == EOF:
602-
self.emitCurrentTokenWithParseError(data)
601+
self.emitCurrentTokenWithParseError()
603602
else:
604603
self.currentToken["data"] += data + self.stream.charsUntil(u"-")
605604
return True
@@ -609,7 +608,7 @@ def commentDashState(self):
609608
if data == u"-":
610609
self.state = self.states["commentEnd"]
611610
elif data == EOF:
612-
self.emitCurrentTokenWithParseError(data)
611+
self.emitCurrentTokenWithParseError()
613612
else:
614613
self.currentToken["data"] += u"-" + data +\
615614
self.stream.charsUntil(u"-")
@@ -627,7 +626,7 @@ def commentEndState(self):
627626
self.tokenQueue.append({"type": "ParseError"})
628627
self.currentToken["data"] += data
629628
elif data == EOF:
630-
self.emitCurrentTokenWithParseError(data)
629+
self.emitCurrentTokenWithParseError()
631630
else:
632631
self.tokenQueue.append({"type": "ParseError"})
633632
self.currentToken["data"] += u"--" + data

0 commit comments

Comments
 (0)