Skip to content

Commit 83ddb4c

Browse files
committed
inline attribute value quoted states; fix test_tokenizer.py to throw a parse error for end tags with attributes
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40463
1 parent 7995adb commit 83ddb4c

File tree

3 files changed

+27
-19
lines changed

3 files changed

+27
-19
lines changed

src/tokenizer.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -257,19 +257,6 @@ def emitCurrentToken(self):
257257
self.tokenQueue.append(self.currentToken)
258258
self.state = self.states["data"]
259259

260-
def attributeValueQuotedStateHandler(self, quoteType):
261-
data = self.stream.char()
262-
if data == quoteType:
263-
self.state = self.states["beforeAttributeName"]
264-
elif data == u"&":
265-
self.processEntityInAttribute()
266-
elif data == EOF:
267-
self.tokenQueue.append({"type": "ParseError", "data":
268-
_("Unexpected end of file in attribute value.")})
269-
self.emitCurrentToken()
270-
else:
271-
self.currentToken["data"][-1][1] += data + self.stream.charsUntil(\
272-
(quoteType, u"&"))
273260

274261
# Below are the various tokenizer states worked out.
275262

@@ -567,14 +554,33 @@ def beforeAttributeValueState(self):
567554
return True
568555

569556
def attributeValueDoubleQuotedState(self):
570-
# AT We could also let self.attributeValueQuotedStateHandler always
571-
# return true and then return that directly here. Not sure what is
572-
# faster or better...
573-
self.attributeValueQuotedStateHandler(u"\"")
557+
data = self.stream.char()
558+
if data == "\"":
559+
self.state = self.states["beforeAttributeName"]
560+
elif data == u"&":
561+
self.processEntityInAttribute()
562+
elif data == EOF:
563+
self.tokenQueue.append({"type": "ParseError", "data":
564+
_("Unexpected end of file in attribute value (\").")})
565+
self.emitCurrentToken()
566+
else:
567+
self.currentToken["data"][-1][1] += data +\
568+
self.stream.charsUntil(("\"", u"&"))
574569
return True
575570

576571
def attributeValueSingleQuotedState(self):
577-
self.attributeValueQuotedStateHandler(u"'")
572+
data = self.stream.char()
573+
if data == "'":
574+
self.state = self.states["beforeAttributeName"]
575+
elif data == u"&":
576+
self.processEntityInAttribute()
577+
elif data == EOF:
578+
self.tokenQueue.append({"type": "ParseError", "data":
579+
_("Unexpected end of file in attribute value (').")})
580+
self.emitCurrentToken()
581+
else:
582+
self.currentToken["data"][-1][1] += data +\
583+
self.stream.charsUntil(("'", u"&"))
578584
return True
579585

580586
def attributeValueUnQuotedState(self):

tests/test_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
#Run the parse error checks
2323
#XXX - ideally want this to be a command line argument
24-
checkParseErrors = True
24+
checkParseErrors = False
2525

2626
def parseTestcase(testString):
2727
testString = testString.split("\n")

tests/test_tokenizer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ def processEmptyTag(self, token):
4646
self.outputTokens.append([u"StartTag", token["name"], token["data"]])
4747

4848
def processEndTag(self, token):
49+
if token["data"]:
50+
self.processParseError(None)
4951
self.outputTokens.append([u"EndTag", token["name"]])
5052

5153
def processComment(self, token):

0 commit comments

Comments
 (0)